8136414: Large performance penalty declaring a method strictfp on strict-only platforms
Reviewed-by: thartmann
This commit is contained in:
parent
c16040393c
commit
590f5996c6
@ -2257,8 +2257,7 @@ void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
// Advertise here if the CPU requires explicit rounding operations to
|
||||
// implement the UseStrictFP mode.
|
||||
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
|
||||
const bool Matcher::strict_fp_requires_explicit_rounding = false;
|
||||
|
||||
// Are floats converted to double when stored to stack during
|
||||
|
@ -61,7 +61,6 @@ define_pd_global(uint64_t,MaxRAM, 1ULL*G);
|
||||
define_pd_global(bool, CICompileOSR, true );
|
||||
#endif // !TIERED
|
||||
define_pd_global(bool, UseTypeProfile, false);
|
||||
define_pd_global(bool, RoundFPResults, true );
|
||||
|
||||
define_pd_global(bool, LIRFillDelaySlots, false);
|
||||
define_pd_global(bool, OptimizeSinglePrecision, true );
|
||||
|
@ -1140,8 +1140,7 @@ const bool Matcher::misaligned_doubles_ok = false;
|
||||
void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
|
||||
}
|
||||
|
||||
// Advertise here if the CPU requires explicit rounding operations
|
||||
// to implement the UseStrictFP mode.
|
||||
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
|
||||
const bool Matcher::strict_fp_requires_explicit_rounding = false;
|
||||
|
||||
// Are floats converted to double when stored to stack during deoptimization?
|
||||
|
@ -62,8 +62,6 @@ define_pd_global(uint64_t, MaxRAM, 1ULL*G);
|
||||
define_pd_global(bool, CICompileOSR, true );
|
||||
#endif // COMPILER2
|
||||
define_pd_global(bool, UseTypeProfile, false);
|
||||
define_pd_global(bool, RoundFPResults, false);
|
||||
|
||||
|
||||
define_pd_global(bool, LIRFillDelaySlots, false);
|
||||
define_pd_global(bool, OptimizeSinglePrecision, true);
|
||||
|
@ -62,7 +62,6 @@ define_pd_global(uintx, InitialCodeCacheSize, 160*K);
|
||||
#endif // !TIERED
|
||||
|
||||
define_pd_global(bool, UseTypeProfile, false);
|
||||
define_pd_global(bool, RoundFPResults, false);
|
||||
|
||||
define_pd_global(bool, LIRFillDelaySlots, false);
|
||||
define_pd_global(bool, OptimizeSinglePrecision, false);
|
||||
|
@ -2501,8 +2501,7 @@ void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
// Advertise here if the CPU requires explicit rounding operations
|
||||
// to implement the UseStrictFP mode.
|
||||
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
|
||||
const bool Matcher::strict_fp_requires_explicit_rounding = false;
|
||||
|
||||
// Do floats take an entire double register or just half?
|
||||
|
@ -63,7 +63,6 @@ define_pd_global(uintx, InitialCodeCacheSize, 160*K);
|
||||
#endif // !TIERED
|
||||
|
||||
define_pd_global(bool, UseTypeProfile, false);
|
||||
define_pd_global(bool, RoundFPResults, false);
|
||||
|
||||
define_pd_global(bool, LIRFillDelaySlots, false);
|
||||
define_pd_global(bool, OptimizeSinglePrecision, false);
|
||||
|
@ -1710,8 +1710,7 @@ const bool Matcher::rematerialize_float_constants = false;
|
||||
// Java calling convention forces doubles to be aligned.
|
||||
const bool Matcher::misaligned_doubles_ok = true;
|
||||
|
||||
// Advertise here if the CPU requires explicit rounding operations
|
||||
// to implement the UseStrictFP mode.
|
||||
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
|
||||
const bool Matcher::strict_fp_requires_explicit_rounding = false;
|
||||
|
||||
// Do floats take an entire double register or just half?
|
||||
|
@ -61,7 +61,6 @@ define_pd_global(uintx, InitialCodeCacheSize, 160*K);
|
||||
#endif // !TIERED
|
||||
|
||||
define_pd_global(bool, UseTypeProfile, false);
|
||||
define_pd_global(bool, RoundFPResults, false);
|
||||
|
||||
define_pd_global(bool, LIRFillDelaySlots, true );
|
||||
define_pd_global(bool, OptimizeSinglePrecision, false);
|
||||
|
@ -1873,8 +1873,7 @@ const bool Matcher::misaligned_doubles_ok = true;
|
||||
void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
|
||||
}
|
||||
|
||||
// Advertise here if the CPU requires explicit rounding operations
|
||||
// to implement the UseStrictFP mode.
|
||||
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
|
||||
const bool Matcher::strict_fp_requires_explicit_rounding = false;
|
||||
|
||||
// Are floats converted to double when stored to stack during deoptimization?
|
||||
|
@ -33,7 +33,7 @@ enum {
|
||||
|
||||
// explicit rounding operations are required to implement the strictFP mode
|
||||
enum {
|
||||
pd_strict_fp_requires_explicit_rounding = true
|
||||
pd_strict_fp_requires_explicit_rounding = LP64_ONLY( false ) NOT_LP64 ( true )
|
||||
};
|
||||
|
||||
|
||||
|
@ -60,7 +60,6 @@ define_pd_global(uint64_t, MaxRAM, 1ULL*G);
|
||||
define_pd_global(bool, CICompileOSR, true );
|
||||
#endif // !TIERED
|
||||
define_pd_global(bool, UseTypeProfile, false);
|
||||
define_pd_global(bool, RoundFPResults, true );
|
||||
|
||||
define_pd_global(bool, LIRFillDelaySlots, false);
|
||||
define_pd_global(bool, OptimizeSinglePrecision, true );
|
||||
|
@ -1516,8 +1516,7 @@ void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
|
||||
node->_opnds[opcnt] = new_memory;
|
||||
}
|
||||
|
||||
// Advertise here if the CPU requires explicit rounding operations
|
||||
// to implement the UseStrictFP mode.
|
||||
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
|
||||
const bool Matcher::strict_fp_requires_explicit_rounding = true;
|
||||
|
||||
// Are floats conerted to double when stored to stack during deoptimization?
|
||||
|
@ -1700,9 +1700,8 @@ const bool Matcher::misaligned_doubles_ok = true;
|
||||
// No-op on amd64
|
||||
void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
|
||||
|
||||
// Advertise here if the CPU requires explicit rounding operations to
|
||||
// implement the UseStrictFP mode.
|
||||
const bool Matcher::strict_fp_requires_explicit_rounding = true;
|
||||
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
|
||||
const bool Matcher::strict_fp_requires_explicit_rounding = false;
|
||||
|
||||
// Are floats conerted to double when stored to stack during deoptimization?
|
||||
// On x64 it is stored without convertion so we can use normal access.
|
||||
@ -10521,24 +10520,6 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
|
||||
|
||||
//----------Arithmetic Conversion Instructions---------------------------------
|
||||
|
||||
instruct roundFloat_nop(regF dst)
|
||||
%{
|
||||
match(Set dst (RoundFloat dst));
|
||||
|
||||
ins_cost(0);
|
||||
ins_encode();
|
||||
ins_pipe(empty);
|
||||
%}
|
||||
|
||||
instruct roundDouble_nop(regD dst)
|
||||
%{
|
||||
match(Set dst (RoundDouble dst));
|
||||
|
||||
ins_cost(0);
|
||||
ins_encode();
|
||||
ins_pipe(empty);
|
||||
%}
|
||||
|
||||
instruct convF2D_reg_reg(regD dst, regF src)
|
||||
%{
|
||||
match(Set dst (ConvF2D src));
|
||||
|
@ -607,9 +607,15 @@ class MemoryBuffer: public CompilationResourceObj {
|
||||
return load;
|
||||
}
|
||||
|
||||
if (RoundFPResults && UseSSE < 2 && load->type()->is_float_kind()) {
|
||||
// can't skip load since value might get rounded as a side effect
|
||||
return load;
|
||||
if (strict_fp_requires_explicit_rounding && load->type()->is_float_kind()) {
|
||||
#ifdef IA32
|
||||
if (UseSSE < 2) {
|
||||
// can't skip load since value might get rounded as a side effect
|
||||
return load;
|
||||
}
|
||||
#else
|
||||
Unimplemented();
|
||||
#endif // IA32
|
||||
}
|
||||
|
||||
ciField* field = load->field();
|
||||
@ -2272,17 +2278,23 @@ void GraphBuilder::throw_op(int bci) {
|
||||
|
||||
|
||||
Value GraphBuilder::round_fp(Value fp_value) {
|
||||
// no rounding needed if SSE2 is used
|
||||
if (RoundFPResults && UseSSE < 2) {
|
||||
// Must currently insert rounding node for doubleword values that
|
||||
// are results of expressions (i.e., not loads from memory or
|
||||
// constants)
|
||||
if (fp_value->type()->tag() == doubleTag &&
|
||||
fp_value->as_Constant() == NULL &&
|
||||
fp_value->as_Local() == NULL && // method parameters need no rounding
|
||||
fp_value->as_RoundFP() == NULL) {
|
||||
return append(new RoundFP(fp_value));
|
||||
if (strict_fp_requires_explicit_rounding) {
|
||||
#ifdef IA32
|
||||
// no rounding needed if SSE2 is used
|
||||
if (UseSSE < 2) {
|
||||
// Must currently insert rounding node for doubleword values that
|
||||
// are results of expressions (i.e., not loads from memory or
|
||||
// constants)
|
||||
if (fp_value->type()->tag() == doubleTag &&
|
||||
fp_value->as_Constant() == NULL &&
|
||||
fp_value->as_Local() == NULL && // method parameters need no rounding
|
||||
fp_value->as_RoundFP() == NULL) {
|
||||
return append(new RoundFP(fp_value));
|
||||
}
|
||||
}
|
||||
#else
|
||||
Unimplemented();
|
||||
#endif // IA32
|
||||
}
|
||||
return fp_value;
|
||||
}
|
||||
@ -3766,11 +3778,17 @@ bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, bool ign
|
||||
// Proper inlining of methods with jsrs requires a little more work.
|
||||
if (callee->has_jsrs() ) INLINE_BAILOUT("jsrs not handled properly by inliner yet");
|
||||
|
||||
// When SSE2 is used on intel, then no special handling is needed
|
||||
// for strictfp because the enum-constant is fixed at compile time,
|
||||
// the check for UseSSE2 is needed here
|
||||
if (strict_fp_requires_explicit_rounding && UseSSE < 2 && method()->is_strict() != callee->is_strict()) {
|
||||
INLINE_BAILOUT("caller and callee have different strict fp requirements");
|
||||
if (strict_fp_requires_explicit_rounding &&
|
||||
method()->is_strict() != callee->is_strict()) {
|
||||
#ifdef IA32
|
||||
// If explicit rounding is required, do not inline strict code into non-strict code (or the reverse).
|
||||
// When SSE2 is present, no special handling is needed.
|
||||
if (UseSSE < 2) {
|
||||
INLINE_BAILOUT("caller and callee have different strict fp requirements");
|
||||
}
|
||||
#else
|
||||
Unimplemented();
|
||||
#endif // IA32
|
||||
}
|
||||
|
||||
if (is_profiling() && !callee->ensure_method_data()) {
|
||||
|
@ -778,6 +778,7 @@ void LIR_Assembler::build_frame() {
|
||||
|
||||
|
||||
void LIR_Assembler::roundfp_op(LIR_Opr src, LIR_Opr tmp, LIR_Opr dest, bool pop_fpu_stack) {
|
||||
assert(strict_fp_requires_explicit_rounding, "not required");
|
||||
assert((src->is_single_fpu() && dest->is_single_stack()) ||
|
||||
(src->is_double_fpu() && dest->is_double_stack()),
|
||||
"round_fp: rounds register -> stack location");
|
||||
|
@ -899,13 +899,19 @@ void LIRGenerator::arraycopy_helper(Intrinsic* x, int* flagsp, ciArrayKlass** ex
|
||||
LIR_Opr LIRGenerator::round_item(LIR_Opr opr) {
|
||||
assert(opr->is_register(), "why spill if item is not register?");
|
||||
|
||||
if (RoundFPResults && UseSSE < 1 && opr->is_single_fpu()) {
|
||||
LIR_Opr result = new_register(T_FLOAT);
|
||||
set_vreg_flag(result, must_start_in_memory);
|
||||
assert(opr->is_register(), "only a register can be spilled");
|
||||
assert(opr->value_type()->is_float(), "rounding only for floats available");
|
||||
__ roundfp(opr, LIR_OprFact::illegalOpr, result);
|
||||
return result;
|
||||
if (strict_fp_requires_explicit_rounding) {
|
||||
#ifdef IA32
|
||||
if (UseSSE < 1 && opr->is_single_fpu()) {
|
||||
LIR_Opr result = new_register(T_FLOAT);
|
||||
set_vreg_flag(result, must_start_in_memory);
|
||||
assert(opr->is_register(), "only a register can be spilled");
|
||||
assert(opr->value_type()->is_float(), "rounding only for floats available");
|
||||
__ roundfp(opr, LIR_OprFact::illegalOpr, result);
|
||||
return result;
|
||||
}
|
||||
#else
|
||||
Unimplemented();
|
||||
#endif // IA32
|
||||
}
|
||||
return opr;
|
||||
}
|
||||
@ -1951,6 +1957,8 @@ void LIRGenerator::do_Throw(Throw* x) {
|
||||
|
||||
|
||||
void LIRGenerator::do_RoundFP(RoundFP* x) {
|
||||
assert(strict_fp_requires_explicit_rounding, "not required");
|
||||
|
||||
LIRItem input(x->input(), this);
|
||||
input.load_item();
|
||||
LIR_Opr input_opr = input.result();
|
||||
|
@ -170,9 +170,6 @@
|
||||
develop(bool, UseTableRanges, true, \
|
||||
"Faster versions of lookup table using ranges") \
|
||||
\
|
||||
develop_pd(bool, RoundFPResults, \
|
||||
"Indicates whether rounding is needed for floating point results")\
|
||||
\
|
||||
develop(intx, NestedInliningSizeRatio, 90, \
|
||||
"Percentage of prev. allowed inline size in recursive inlining") \
|
||||
range(0, 100) \
|
||||
|
@ -150,8 +150,9 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
|
||||
return cg;
|
||||
}
|
||||
|
||||
// Do not inline strict fp into non-strict code, or the reverse
|
||||
if (caller->is_strict() ^ callee->is_strict()) {
|
||||
// If explicit rounding is required, do not inline strict into non-strict code (or the reverse).
|
||||
if (Matcher::strict_fp_requires_explicit_rounding &&
|
||||
caller->is_strict() != callee->is_strict()) {
|
||||
allow_inline = false;
|
||||
}
|
||||
|
||||
|
@ -2142,22 +2142,6 @@ Node* GraphKit::just_allocated_object(Node* current_control) {
|
||||
}
|
||||
|
||||
|
||||
void GraphKit::round_double_arguments(ciMethod* dest_method) {
|
||||
// (Note: TypeFunc::make has a cache that makes this fast.)
|
||||
const TypeFunc* tf = TypeFunc::make(dest_method);
|
||||
int nargs = tf->domain()->cnt() - TypeFunc::Parms;
|
||||
for (int j = 0; j < nargs; j++) {
|
||||
const Type *targ = tf->domain()->field_at(j + TypeFunc::Parms);
|
||||
if( targ->basic_type() == T_DOUBLE ) {
|
||||
// If any parameters are doubles, they must be rounded before
|
||||
// the call, dstore_rounding does gvn.transform
|
||||
Node *arg = argument(j);
|
||||
arg = dstore_rounding(arg);
|
||||
set_argument(j, arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record profiling data exact_kls for Node n with the type system so
|
||||
* that it can propagate it (speculation)
|
||||
@ -2323,43 +2307,80 @@ void GraphKit::record_profiled_return_for_speculation() {
|
||||
}
|
||||
|
||||
void GraphKit::round_double_result(ciMethod* dest_method) {
|
||||
// A non-strict method may return a double value which has an extended
|
||||
// exponent, but this must not be visible in a caller which is 'strict'
|
||||
// If a strict caller invokes a non-strict callee, round a double result
|
||||
if (Matcher::strict_fp_requires_explicit_rounding) {
|
||||
// If a strict caller invokes a non-strict callee, round a double result.
|
||||
// A non-strict method may return a double value which has an extended exponent,
|
||||
// but this must not be visible in a caller which is strict.
|
||||
BasicType result_type = dest_method->return_type()->basic_type();
|
||||
assert(method() != NULL, "must have caller context");
|
||||
if( result_type == T_DOUBLE && method()->is_strict() && !dest_method->is_strict() ) {
|
||||
// Destination method's return value is on top of stack
|
||||
// dstore_rounding() does gvn.transform
|
||||
Node *result = pop_pair();
|
||||
result = dstore_rounding(result);
|
||||
push_pair(result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BasicType result_type = dest_method->return_type()->basic_type();
|
||||
assert( method() != NULL, "must have caller context");
|
||||
if( result_type == T_DOUBLE && method()->is_strict() && !dest_method->is_strict() ) {
|
||||
// Destination method's return value is on top of stack
|
||||
// dstore_rounding() does gvn.transform
|
||||
Node *result = pop_pair();
|
||||
result = dstore_rounding(result);
|
||||
push_pair(result);
|
||||
void GraphKit::round_double_arguments(ciMethod* dest_method) {
|
||||
if (Matcher::strict_fp_requires_explicit_rounding) {
|
||||
// (Note: TypeFunc::make has a cache that makes this fast.)
|
||||
const TypeFunc* tf = TypeFunc::make(dest_method);
|
||||
int nargs = tf->domain()->cnt() - TypeFunc::Parms;
|
||||
for (int j = 0; j < nargs; j++) {
|
||||
const Type *targ = tf->domain()->field_at(j + TypeFunc::Parms);
|
||||
if (targ->basic_type() == T_DOUBLE) {
|
||||
// If any parameters are doubles, they must be rounded before
|
||||
// the call, dstore_rounding does gvn.transform
|
||||
Node *arg = argument(j);
|
||||
arg = dstore_rounding(arg);
|
||||
set_argument(j, arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// rounding for strict float precision conformance
|
||||
Node* GraphKit::precision_rounding(Node* n) {
|
||||
return UseStrictFP && _method->flags().is_strict()
|
||||
&& UseSSE == 0 && Matcher::strict_fp_requires_explicit_rounding
|
||||
? _gvn.transform( new RoundFloatNode(0, n) )
|
||||
: n;
|
||||
if (Matcher::strict_fp_requires_explicit_rounding) {
|
||||
#ifdef IA32
|
||||
if (_method->flags().is_strict() && UseSSE == 0) {
|
||||
return _gvn.transform(new RoundFloatNode(0, n));
|
||||
}
|
||||
#else
|
||||
Unimplemented();
|
||||
#endif // IA32
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
// rounding for strict double precision conformance
|
||||
Node* GraphKit::dprecision_rounding(Node *n) {
|
||||
return UseStrictFP && _method->flags().is_strict()
|
||||
&& UseSSE <= 1 && Matcher::strict_fp_requires_explicit_rounding
|
||||
? _gvn.transform( new RoundDoubleNode(0, n) )
|
||||
: n;
|
||||
if (Matcher::strict_fp_requires_explicit_rounding) {
|
||||
#ifdef IA32
|
||||
if (_method->flags().is_strict() && UseSSE < 2) {
|
||||
return _gvn.transform(new RoundDoubleNode(0, n));
|
||||
}
|
||||
#else
|
||||
Unimplemented();
|
||||
#endif // IA32
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
// rounding for non-strict double stores
|
||||
Node* GraphKit::dstore_rounding(Node* n) {
|
||||
return Matcher::strict_fp_requires_explicit_rounding
|
||||
&& UseSSE <= 1
|
||||
? _gvn.transform( new RoundDoubleNode(0, n) )
|
||||
: n;
|
||||
if (Matcher::strict_fp_requires_explicit_rounding) {
|
||||
#ifdef IA32
|
||||
if (UseSSE < 2) {
|
||||
return _gvn.transform(new RoundDoubleNode(0, n));
|
||||
}
|
||||
#else
|
||||
Unimplemented();
|
||||
#endif // IA32
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
|
@ -1785,8 +1785,15 @@ bool LibraryCallKit::inline_string_char_access(bool is_store) {
|
||||
//--------------------------round_double_node--------------------------------
|
||||
// Round a double node if necessary.
|
||||
Node* LibraryCallKit::round_double_node(Node* n) {
|
||||
if (Matcher::strict_fp_requires_explicit_rounding && UseSSE <= 1)
|
||||
n = _gvn.transform(new RoundDoubleNode(0, n));
|
||||
if (Matcher::strict_fp_requires_explicit_rounding) {
|
||||
#ifdef IA32
|
||||
if (UseSSE < 2) {
|
||||
n = _gvn.transform(new RoundDoubleNode(NULL, n));
|
||||
}
|
||||
#else
|
||||
Unimplemented();
|
||||
#endif // IA32
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
|
@ -534,8 +534,7 @@ public:
|
||||
// on windows95 to take care of some unusual register constraints.
|
||||
void pd_implicit_null_fixup(MachNode *load, uint idx);
|
||||
|
||||
// Advertise here if the CPU requires explicit rounding operations
|
||||
// to implement the UseStrictFP mode.
|
||||
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
|
||||
static const bool strict_fp_requires_explicit_rounding;
|
||||
|
||||
// Are floats conerted to double when stored to stack during deoptimization?
|
||||
|
@ -994,9 +994,6 @@ const size_t minimumSymbolTableSize = 1024;
|
||||
"proper StackOverflow handling; disable only to measure cost " \
|
||||
"of stackbanging)") \
|
||||
\
|
||||
develop(bool, UseStrictFP, true, \
|
||||
"use strict fp if modifier strictfp is set") \
|
||||
\
|
||||
develop(bool, GenerateSynchronizationCode, true, \
|
||||
"generate locking/unlocking code for synchronized methods and " \
|
||||
"monitors") \
|
||||
|
Loading…
x
Reference in New Issue
Block a user