8136414: Large performance penalty declaring a method strictfp on strict-only platforms

Reviewed-by: thartmann
This commit is contained in:
Vladimir Ivanov 2020-02-11 14:55:39 +03:00
parent c16040393c
commit 590f5996c6
23 changed files with 134 additions and 117 deletions

View File

@ -2257,8 +2257,7 @@ void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
Unimplemented();
}
// Advertise here if the CPU requires explicit rounding operations to
// implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
// Are floats converted to double when stored to stack during

View File

@ -61,7 +61,6 @@ define_pd_global(uint64_t,MaxRAM, 1ULL*G);
define_pd_global(bool, CICompileOSR, true );
#endif // !TIERED
define_pd_global(bool, UseTypeProfile, false);
define_pd_global(bool, RoundFPResults, true );
define_pd_global(bool, LIRFillDelaySlots, false);
define_pd_global(bool, OptimizeSinglePrecision, true );

View File

@ -1140,8 +1140,7 @@ const bool Matcher::misaligned_doubles_ok = false;
void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
}
// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
// Are floats converted to double when stored to stack during deoptimization?

View File

@ -62,8 +62,6 @@ define_pd_global(uint64_t, MaxRAM, 1ULL*G);
define_pd_global(bool, CICompileOSR, true );
#endif // COMPILER2
define_pd_global(bool, UseTypeProfile, false);
define_pd_global(bool, RoundFPResults, false);
define_pd_global(bool, LIRFillDelaySlots, false);
define_pd_global(bool, OptimizeSinglePrecision, true);

View File

@ -62,7 +62,6 @@ define_pd_global(uintx, InitialCodeCacheSize, 160*K);
#endif // !TIERED
define_pd_global(bool, UseTypeProfile, false);
define_pd_global(bool, RoundFPResults, false);
define_pd_global(bool, LIRFillDelaySlots, false);
define_pd_global(bool, OptimizeSinglePrecision, false);

View File

@ -2501,8 +2501,7 @@ void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
Unimplemented();
}
// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
// Do floats take an entire double register or just half?

View File

@ -63,7 +63,6 @@ define_pd_global(uintx, InitialCodeCacheSize, 160*K);
#endif // !TIERED
define_pd_global(bool, UseTypeProfile, false);
define_pd_global(bool, RoundFPResults, false);
define_pd_global(bool, LIRFillDelaySlots, false);
define_pd_global(bool, OptimizeSinglePrecision, false);

View File

@ -1710,8 +1710,7 @@ const bool Matcher::rematerialize_float_constants = false;
// Java calling convention forces doubles to be aligned.
const bool Matcher::misaligned_doubles_ok = true;
// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
// Do floats take an entire double register or just half?

View File

@ -61,7 +61,6 @@ define_pd_global(uintx, InitialCodeCacheSize, 160*K);
#endif // !TIERED
define_pd_global(bool, UseTypeProfile, false);
define_pd_global(bool, RoundFPResults, false);
define_pd_global(bool, LIRFillDelaySlots, true );
define_pd_global(bool, OptimizeSinglePrecision, false);

View File

@ -1873,8 +1873,7 @@ const bool Matcher::misaligned_doubles_ok = true;
void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
}
// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
// Are floats converted to double when stored to stack during deoptimization?

View File

@ -33,7 +33,7 @@ enum {
// explicit rounding operations are required to implement the strictFP mode
enum {
pd_strict_fp_requires_explicit_rounding = true
pd_strict_fp_requires_explicit_rounding = LP64_ONLY( false ) NOT_LP64 ( true )
};

View File

@ -60,7 +60,6 @@ define_pd_global(uint64_t, MaxRAM, 1ULL*G);
define_pd_global(bool, CICompileOSR, true );
#endif // !TIERED
define_pd_global(bool, UseTypeProfile, false);
define_pd_global(bool, RoundFPResults, true );
define_pd_global(bool, LIRFillDelaySlots, false);
define_pd_global(bool, OptimizeSinglePrecision, true );

View File

@ -1516,8 +1516,7 @@ void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
node->_opnds[opcnt] = new_memory;
}
// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = true;
// Are floats conerted to double when stored to stack during deoptimization?

View File

@ -1700,9 +1700,8 @@ const bool Matcher::misaligned_doubles_ok = true;
// No-op on amd64
void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
// Advertise here if the CPU requires explicit rounding operations to
// implement the UseStrictFP mode.
const bool Matcher::strict_fp_requires_explicit_rounding = true;
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
// Are floats conerted to double when stored to stack during deoptimization?
// On x64 it is stored without convertion so we can use normal access.
@ -10521,24 +10520,6 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
//----------Arithmetic Conversion Instructions---------------------------------
instruct roundFloat_nop(regF dst)
%{
match(Set dst (RoundFloat dst));
ins_cost(0);
ins_encode();
ins_pipe(empty);
%}
instruct roundDouble_nop(regD dst)
%{
match(Set dst (RoundDouble dst));
ins_cost(0);
ins_encode();
ins_pipe(empty);
%}
instruct convF2D_reg_reg(regD dst, regF src)
%{
match(Set dst (ConvF2D src));

View File

@ -607,9 +607,15 @@ class MemoryBuffer: public CompilationResourceObj {
return load;
}
if (RoundFPResults && UseSSE < 2 && load->type()->is_float_kind()) {
// can't skip load since value might get rounded as a side effect
return load;
if (strict_fp_requires_explicit_rounding && load->type()->is_float_kind()) {
#ifdef IA32
if (UseSSE < 2) {
// can't skip load since value might get rounded as a side effect
return load;
}
#else
Unimplemented();
#endif // IA32
}
ciField* field = load->field();
@ -2272,17 +2278,23 @@ void GraphBuilder::throw_op(int bci) {
Value GraphBuilder::round_fp(Value fp_value) {
// no rounding needed if SSE2 is used
if (RoundFPResults && UseSSE < 2) {
// Must currently insert rounding node for doubleword values that
// are results of expressions (i.e., not loads from memory or
// constants)
if (fp_value->type()->tag() == doubleTag &&
fp_value->as_Constant() == NULL &&
fp_value->as_Local() == NULL && // method parameters need no rounding
fp_value->as_RoundFP() == NULL) {
return append(new RoundFP(fp_value));
if (strict_fp_requires_explicit_rounding) {
#ifdef IA32
// no rounding needed if SSE2 is used
if (UseSSE < 2) {
// Must currently insert rounding node for doubleword values that
// are results of expressions (i.e., not loads from memory or
// constants)
if (fp_value->type()->tag() == doubleTag &&
fp_value->as_Constant() == NULL &&
fp_value->as_Local() == NULL && // method parameters need no rounding
fp_value->as_RoundFP() == NULL) {
return append(new RoundFP(fp_value));
}
}
#else
Unimplemented();
#endif // IA32
}
return fp_value;
}
@ -3766,11 +3778,17 @@ bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, bool ign
// Proper inlining of methods with jsrs requires a little more work.
if (callee->has_jsrs() ) INLINE_BAILOUT("jsrs not handled properly by inliner yet");
// When SSE2 is used on intel, then no special handling is needed
// for strictfp because the enum-constant is fixed at compile time,
// the check for UseSSE2 is needed here
if (strict_fp_requires_explicit_rounding && UseSSE < 2 && method()->is_strict() != callee->is_strict()) {
INLINE_BAILOUT("caller and callee have different strict fp requirements");
if (strict_fp_requires_explicit_rounding &&
method()->is_strict() != callee->is_strict()) {
#ifdef IA32
// If explicit rounding is required, do not inline strict code into non-strict code (or the reverse).
// When SSE2 is present, no special handling is needed.
if (UseSSE < 2) {
INLINE_BAILOUT("caller and callee have different strict fp requirements");
}
#else
Unimplemented();
#endif // IA32
}
if (is_profiling() && !callee->ensure_method_data()) {

View File

@ -778,6 +778,7 @@ void LIR_Assembler::build_frame() {
void LIR_Assembler::roundfp_op(LIR_Opr src, LIR_Opr tmp, LIR_Opr dest, bool pop_fpu_stack) {
assert(strict_fp_requires_explicit_rounding, "not required");
assert((src->is_single_fpu() && dest->is_single_stack()) ||
(src->is_double_fpu() && dest->is_double_stack()),
"round_fp: rounds register -> stack location");

View File

@ -899,13 +899,19 @@ void LIRGenerator::arraycopy_helper(Intrinsic* x, int* flagsp, ciArrayKlass** ex
LIR_Opr LIRGenerator::round_item(LIR_Opr opr) {
assert(opr->is_register(), "why spill if item is not register?");
if (RoundFPResults && UseSSE < 1 && opr->is_single_fpu()) {
LIR_Opr result = new_register(T_FLOAT);
set_vreg_flag(result, must_start_in_memory);
assert(opr->is_register(), "only a register can be spilled");
assert(opr->value_type()->is_float(), "rounding only for floats available");
__ roundfp(opr, LIR_OprFact::illegalOpr, result);
return result;
if (strict_fp_requires_explicit_rounding) {
#ifdef IA32
if (UseSSE < 1 && opr->is_single_fpu()) {
LIR_Opr result = new_register(T_FLOAT);
set_vreg_flag(result, must_start_in_memory);
assert(opr->is_register(), "only a register can be spilled");
assert(opr->value_type()->is_float(), "rounding only for floats available");
__ roundfp(opr, LIR_OprFact::illegalOpr, result);
return result;
}
#else
Unimplemented();
#endif // IA32
}
return opr;
}
@ -1951,6 +1957,8 @@ void LIRGenerator::do_Throw(Throw* x) {
void LIRGenerator::do_RoundFP(RoundFP* x) {
assert(strict_fp_requires_explicit_rounding, "not required");
LIRItem input(x->input(), this);
input.load_item();
LIR_Opr input_opr = input.result();

View File

@ -170,9 +170,6 @@
develop(bool, UseTableRanges, true, \
"Faster versions of lookup table using ranges") \
\
develop_pd(bool, RoundFPResults, \
"Indicates whether rounding is needed for floating point results")\
\
develop(intx, NestedInliningSizeRatio, 90, \
"Percentage of prev. allowed inline size in recursive inlining") \
range(0, 100) \

View File

@ -150,8 +150,9 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
return cg;
}
// Do not inline strict fp into non-strict code, or the reverse
if (caller->is_strict() ^ callee->is_strict()) {
// If explicit rounding is required, do not inline strict into non-strict code (or the reverse).
if (Matcher::strict_fp_requires_explicit_rounding &&
caller->is_strict() != callee->is_strict()) {
allow_inline = false;
}

View File

@ -2142,22 +2142,6 @@ Node* GraphKit::just_allocated_object(Node* current_control) {
}
void GraphKit::round_double_arguments(ciMethod* dest_method) {
// (Note: TypeFunc::make has a cache that makes this fast.)
const TypeFunc* tf = TypeFunc::make(dest_method);
int nargs = tf->domain()->cnt() - TypeFunc::Parms;
for (int j = 0; j < nargs; j++) {
const Type *targ = tf->domain()->field_at(j + TypeFunc::Parms);
if( targ->basic_type() == T_DOUBLE ) {
// If any parameters are doubles, they must be rounded before
// the call, dstore_rounding does gvn.transform
Node *arg = argument(j);
arg = dstore_rounding(arg);
set_argument(j, arg);
}
}
}
/**
* Record profiling data exact_kls for Node n with the type system so
* that it can propagate it (speculation)
@ -2323,43 +2307,80 @@ void GraphKit::record_profiled_return_for_speculation() {
}
void GraphKit::round_double_result(ciMethod* dest_method) {
// A non-strict method may return a double value which has an extended
// exponent, but this must not be visible in a caller which is 'strict'
// If a strict caller invokes a non-strict callee, round a double result
if (Matcher::strict_fp_requires_explicit_rounding) {
// If a strict caller invokes a non-strict callee, round a double result.
// A non-strict method may return a double value which has an extended exponent,
// but this must not be visible in a caller which is strict.
BasicType result_type = dest_method->return_type()->basic_type();
assert(method() != NULL, "must have caller context");
if( result_type == T_DOUBLE && method()->is_strict() && !dest_method->is_strict() ) {
// Destination method's return value is on top of stack
// dstore_rounding() does gvn.transform
Node *result = pop_pair();
result = dstore_rounding(result);
push_pair(result);
}
}
}
BasicType result_type = dest_method->return_type()->basic_type();
assert( method() != NULL, "must have caller context");
if( result_type == T_DOUBLE && method()->is_strict() && !dest_method->is_strict() ) {
// Destination method's return value is on top of stack
// dstore_rounding() does gvn.transform
Node *result = pop_pair();
result = dstore_rounding(result);
push_pair(result);
void GraphKit::round_double_arguments(ciMethod* dest_method) {
if (Matcher::strict_fp_requires_explicit_rounding) {
// (Note: TypeFunc::make has a cache that makes this fast.)
const TypeFunc* tf = TypeFunc::make(dest_method);
int nargs = tf->domain()->cnt() - TypeFunc::Parms;
for (int j = 0; j < nargs; j++) {
const Type *targ = tf->domain()->field_at(j + TypeFunc::Parms);
if (targ->basic_type() == T_DOUBLE) {
// If any parameters are doubles, they must be rounded before
// the call, dstore_rounding does gvn.transform
Node *arg = argument(j);
arg = dstore_rounding(arg);
set_argument(j, arg);
}
}
}
}
// rounding for strict float precision conformance
Node* GraphKit::precision_rounding(Node* n) {
return UseStrictFP && _method->flags().is_strict()
&& UseSSE == 0 && Matcher::strict_fp_requires_explicit_rounding
? _gvn.transform( new RoundFloatNode(0, n) )
: n;
if (Matcher::strict_fp_requires_explicit_rounding) {
#ifdef IA32
if (_method->flags().is_strict() && UseSSE == 0) {
return _gvn.transform(new RoundFloatNode(0, n));
}
#else
Unimplemented();
#endif // IA32
}
return n;
}
// rounding for strict double precision conformance
Node* GraphKit::dprecision_rounding(Node *n) {
return UseStrictFP && _method->flags().is_strict()
&& UseSSE <= 1 && Matcher::strict_fp_requires_explicit_rounding
? _gvn.transform( new RoundDoubleNode(0, n) )
: n;
if (Matcher::strict_fp_requires_explicit_rounding) {
#ifdef IA32
if (_method->flags().is_strict() && UseSSE < 2) {
return _gvn.transform(new RoundDoubleNode(0, n));
}
#else
Unimplemented();
#endif // IA32
}
return n;
}
// rounding for non-strict double stores
Node* GraphKit::dstore_rounding(Node* n) {
return Matcher::strict_fp_requires_explicit_rounding
&& UseSSE <= 1
? _gvn.transform( new RoundDoubleNode(0, n) )
: n;
if (Matcher::strict_fp_requires_explicit_rounding) {
#ifdef IA32
if (UseSSE < 2) {
return _gvn.transform(new RoundDoubleNode(0, n));
}
#else
Unimplemented();
#endif // IA32
}
return n;
}
//=============================================================================

View File

@ -1785,8 +1785,15 @@ bool LibraryCallKit::inline_string_char_access(bool is_store) {
//--------------------------round_double_node--------------------------------
// Round a double node if necessary.
Node* LibraryCallKit::round_double_node(Node* n) {
if (Matcher::strict_fp_requires_explicit_rounding && UseSSE <= 1)
n = _gvn.transform(new RoundDoubleNode(0, n));
if (Matcher::strict_fp_requires_explicit_rounding) {
#ifdef IA32
if (UseSSE < 2) {
n = _gvn.transform(new RoundDoubleNode(NULL, n));
}
#else
Unimplemented();
#endif // IA32
}
return n;
}

View File

@ -534,8 +534,7 @@ public:
// on windows95 to take care of some unusual register constraints.
void pd_implicit_null_fixup(MachNode *load, uint idx);
// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
static const bool strict_fp_requires_explicit_rounding;
// Are floats conerted to double when stored to stack during deoptimization?

View File

@ -994,9 +994,6 @@ const size_t minimumSymbolTableSize = 1024;
"proper StackOverflow handling; disable only to measure cost " \
"of stackbanging)") \
\
develop(bool, UseStrictFP, true, \
"use strict fp if modifier strictfp is set") \
\
develop(bool, GenerateSynchronizationCode, true, \
"generate locking/unlocking code for synchronized methods and " \
"monitors") \