8238681: Make -XX:UseSSE flag x86-specific
Reviewed-by: dholmes, kvn
This commit is contained in:
parent
8208b9ce32
commit
072cfd2e48
src/hotspot
cpu
ppc
sparc
x86
share
c1
compiler
jvmci
runtime
test/hotspot/jtreg/compiler/c1
@ -192,8 +192,6 @@ void VM_Version::initialize() {
|
||||
_supports_atomic_getset8 = true;
|
||||
_supports_atomic_getadd8 = true;
|
||||
|
||||
UseSSE = 0; // Only on x86 and x64
|
||||
|
||||
intx cache_line_size = L1_data_cache_line_size();
|
||||
|
||||
if (PowerArchitecturePPC64 >= 9) {
|
||||
|
@ -68,8 +68,6 @@ void VM_Version::initialize() {
|
||||
}
|
||||
}
|
||||
|
||||
UseSSE = false; // Only used on x86 and x64.
|
||||
|
||||
_supports_cx8 = true; // All SPARC V9 implementations.
|
||||
_supports_atomic_getset4 = true; // Using the 'swap' instruction.
|
||||
|
||||
|
@ -93,9 +93,13 @@ LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
|
||||
case intTag: opr = FrameMap::rax_opr; break;
|
||||
case objectTag: opr = FrameMap::rax_oop_opr; break;
|
||||
case longTag: opr = FrameMap::long0_opr; break;
|
||||
#ifdef _LP64
|
||||
case floatTag: opr = FrameMap::xmm0_float_opr; break;
|
||||
case doubleTag: opr = FrameMap::xmm0_double_opr; break;
|
||||
#else
|
||||
case floatTag: opr = UseSSE >= 1 ? FrameMap::xmm0_float_opr : FrameMap::fpu0_float_opr; break;
|
||||
case doubleTag: opr = UseSSE >= 2 ? FrameMap::xmm0_double_opr : FrameMap::fpu0_double_opr; break;
|
||||
|
||||
#endif // _LP64
|
||||
case addressTag:
|
||||
default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
|
||||
}
|
||||
@ -356,6 +360,7 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
|
||||
left.dont_load_item();
|
||||
}
|
||||
|
||||
#ifndef _LP64
|
||||
// do not load right operand if it is a constant. only 0 and 1 are
|
||||
// loaded because there are special instructions for loading them
|
||||
// without memory access (not needed for SSE2 instructions)
|
||||
@ -371,13 +376,18 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
|
||||
must_load_right = UseSSE < 2 && (c->is_one_double() || c->is_zero_double());
|
||||
}
|
||||
}
|
||||
#endif // !LP64
|
||||
|
||||
if (must_load_both) {
|
||||
// frem and drem destroy also right operand, so move it to a new register
|
||||
right.set_destroys_register();
|
||||
right.load_item();
|
||||
} else if (right.is_register() || must_load_right) {
|
||||
} else if (right.is_register()) {
|
||||
right.load_item();
|
||||
#ifndef _LP64
|
||||
} else if (must_load_right) {
|
||||
right.load_item();
|
||||
#endif // !LP64
|
||||
} else {
|
||||
right.dont_load_item();
|
||||
}
|
||||
@ -788,9 +798,11 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
||||
LIRItem value(x->argument_at(0), this);
|
||||
|
||||
bool use_fpu = false;
|
||||
#ifndef _LP64
|
||||
if (UseSSE < 2) {
|
||||
value.set_destroys_register();
|
||||
}
|
||||
#endif // !LP64
|
||||
value.load_item();
|
||||
|
||||
LIR_Opr calc_input = value.result();
|
||||
@ -1552,10 +1564,12 @@ void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
|
||||
LIR_Opr temp_double = new_register(T_DOUBLE);
|
||||
__ volatile_move(LIR_OprFact::address(address), temp_double, T_LONG, info);
|
||||
__ volatile_move(temp_double, result, T_LONG);
|
||||
#ifndef _LP64
|
||||
if (UseSSE < 2) {
|
||||
// no spill slot needed in SSE2 mode because xmm->cpu register move is possible
|
||||
set_vreg_flag(result, must_start_in_memory);
|
||||
}
|
||||
#endif // !LP64
|
||||
} else {
|
||||
__ load(address, result, info);
|
||||
}
|
||||
|
@ -367,6 +367,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
|
||||
#endif
|
||||
|
||||
if (save_fpu_registers) {
|
||||
#ifndef _LP64
|
||||
if (UseSSE < 2) {
|
||||
int fpu_off = float_regs_as_doubles_off;
|
||||
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
|
||||
@ -379,7 +380,18 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
|
||||
fpu_off += 2;
|
||||
}
|
||||
assert(fpu_off == fpu_state_off, "incorrect number of fpu stack slots");
|
||||
|
||||
if (UseSSE == 1) {
|
||||
int xmm_off = xmm_regs_as_doubles_off;
|
||||
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
|
||||
VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
|
||||
xmm_off += 2;
|
||||
}
|
||||
assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
|
||||
}
|
||||
}
|
||||
#endif // !LP64
|
||||
|
||||
if (UseSSE >= 2) {
|
||||
int xmm_off = xmm_regs_as_doubles_off;
|
||||
@ -395,15 +407,6 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
|
||||
xmm_off += 2;
|
||||
}
|
||||
assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
|
||||
|
||||
} else if (UseSSE == 1) {
|
||||
int xmm_off = xmm_regs_as_doubles_off;
|
||||
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
|
||||
VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
|
||||
map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
|
||||
xmm_off += 2;
|
||||
}
|
||||
assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
|
||||
}
|
||||
}
|
||||
|
||||
@ -454,6 +457,16 @@ void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers)
|
||||
__ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
|
||||
offset += 8;
|
||||
}
|
||||
|
||||
if (UseSSE == 1) {
|
||||
// save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
|
||||
int offset = 0;
|
||||
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
|
||||
XMMRegister xmm_name = as_XMMRegister(n);
|
||||
__ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
|
||||
offset += 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // !_LP64
|
||||
|
||||
@ -475,16 +488,6 @@ void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers)
|
||||
__ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
|
||||
offset += 8;
|
||||
}
|
||||
#ifndef _LP64
|
||||
} else if (UseSSE == 1) {
|
||||
// save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
|
||||
int offset = 0;
|
||||
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
|
||||
XMMRegister xmm_name = as_XMMRegister(n);
|
||||
__ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
|
||||
offset += 8;
|
||||
}
|
||||
#endif // !_LP64
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -103,6 +103,10 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
||||
product(bool, UseStoreImmI16, true, \
|
||||
"Use store immediate 16-bits value instruction on x86") \
|
||||
\
|
||||
product(intx, UseSSE, 99, \
|
||||
"Highest supported SSE instructions set on x86/x64") \
|
||||
range(0, 99) \
|
||||
\
|
||||
product(intx, UseAVX, 3, \
|
||||
"Highest supported AVX instructions set on x86/x64") \
|
||||
range(0, 99) \
|
||||
|
@ -2724,17 +2724,6 @@ void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef _LP64
|
||||
void MacroAssembler::empty_FPU_stack() {
|
||||
if (VM_Version::supports_mmx()) {
|
||||
emms();
|
||||
} else {
|
||||
for (int i = 8; i-- > 0; ) ffree(i);
|
||||
}
|
||||
}
|
||||
#endif // !LP64
|
||||
|
||||
|
||||
void MacroAssembler::enter() {
|
||||
push(rbp);
|
||||
mov(rbp, rsp);
|
||||
@ -2753,7 +2742,7 @@ void MacroAssembler::fat_nop() {
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(_LP64)
|
||||
#ifndef _LP64
|
||||
void MacroAssembler::fcmp(Register tmp) {
|
||||
fcmp(tmp, 1, true, true);
|
||||
}
|
||||
@ -2856,6 +2845,14 @@ void MacroAssembler::fremr(Register tmp) {
|
||||
fxch(1);
|
||||
fpop();
|
||||
}
|
||||
|
||||
void MacroAssembler::empty_FPU_stack() {
|
||||
if (VM_Version::supports_mmx()) {
|
||||
emms();
|
||||
} else {
|
||||
for (int i = 8; i-- > 0; ) ffree(i);
|
||||
}
|
||||
}
|
||||
#endif // !LP64
|
||||
|
||||
void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
|
||||
@ -2868,39 +2865,51 @@ void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
|
||||
}
|
||||
|
||||
void MacroAssembler::load_float(Address src) {
|
||||
#ifdef _LP64
|
||||
movflt(xmm0, src);
|
||||
#else
|
||||
if (UseSSE >= 1) {
|
||||
movflt(xmm0, src);
|
||||
} else {
|
||||
LP64_ONLY(ShouldNotReachHere());
|
||||
NOT_LP64(fld_s(src));
|
||||
fld_s(src);
|
||||
}
|
||||
#endif // LP64
|
||||
}
|
||||
|
||||
void MacroAssembler::store_float(Address dst) {
|
||||
#ifdef _LP64
|
||||
movflt(dst, xmm0);
|
||||
#else
|
||||
if (UseSSE >= 1) {
|
||||
movflt(dst, xmm0);
|
||||
} else {
|
||||
LP64_ONLY(ShouldNotReachHere());
|
||||
NOT_LP64(fstp_s(dst));
|
||||
fstp_s(dst);
|
||||
}
|
||||
#endif // LP64
|
||||
}
|
||||
|
||||
void MacroAssembler::load_double(Address src) {
|
||||
#ifdef _LP64
|
||||
movdbl(xmm0, src);
|
||||
#else
|
||||
if (UseSSE >= 2) {
|
||||
movdbl(xmm0, src);
|
||||
} else {
|
||||
LP64_ONLY(ShouldNotReachHere());
|
||||
NOT_LP64(fld_d(src));
|
||||
fld_d(src);
|
||||
}
|
||||
#endif // LP64
|
||||
}
|
||||
|
||||
void MacroAssembler::store_double(Address dst) {
|
||||
#ifdef _LP64
|
||||
movdbl(dst, xmm0);
|
||||
#else
|
||||
if (UseSSE >= 2) {
|
||||
movdbl(dst, xmm0);
|
||||
} else {
|
||||
LP64_ONLY(ShouldNotReachHere());
|
||||
NOT_LP64(fstp_d(dst));
|
||||
fstp_d(dst);
|
||||
}
|
||||
#endif // LP64
|
||||
}
|
||||
|
||||
// dst = c = a * b + c
|
||||
|
@ -1295,14 +1295,6 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_AddReductionVF:
|
||||
case Op_AddReductionVD:
|
||||
case Op_MulReductionVF:
|
||||
case Op_MulReductionVD:
|
||||
if (UseSSE < 1) { // requires at least SSE
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_SqrtVD:
|
||||
case Op_SqrtVF:
|
||||
if (UseAVX < 1) { // enabled for AVX only
|
||||
@ -1338,14 +1330,6 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_MulAddVS2VI:
|
||||
case Op_RShiftVL:
|
||||
case Op_AbsVD:
|
||||
case Op_NegVD:
|
||||
if (UseSSE < 2) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_MulVB:
|
||||
case Op_LShiftVB:
|
||||
case Op_RShiftVB:
|
||||
@ -1381,6 +1365,24 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
return false; // 128bit vroundpd is not available
|
||||
}
|
||||
break;
|
||||
#ifndef _LP64
|
||||
case Op_AddReductionVF:
|
||||
case Op_AddReductionVD:
|
||||
case Op_MulReductionVF:
|
||||
case Op_MulReductionVD:
|
||||
if (UseSSE < 1) { // requires at least SSE
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_MulAddVS2VI:
|
||||
case Op_RShiftVL:
|
||||
case Op_AbsVD:
|
||||
case Op_NegVD:
|
||||
if (UseSSE < 2) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
#endif // !LP64
|
||||
}
|
||||
return true; // Match rules are supported by default.
|
||||
}
|
||||
|
@ -1092,7 +1092,7 @@ IntervalUseKind LinearScan::use_kind_of_input_operand(LIR_Op* op, LIR_Opr opr) {
|
||||
// this operand is allowed to be on the stack in some cases
|
||||
BasicType opr_type = opr->type_register();
|
||||
if (opr_type == T_FLOAT || opr_type == T_DOUBLE) {
|
||||
if ((UseSSE == 1 && opr_type == T_FLOAT) || UseSSE >= 2 S390_ONLY(|| true)) {
|
||||
if (IA32_ONLY( (UseSSE == 1 && opr_type == T_FLOAT) || UseSSE >= 2 ) NOT_IA32( true )) {
|
||||
// SSE float instruction (T_DOUBLE only supported with SSE2)
|
||||
switch (op->code()) {
|
||||
case lir_cmp:
|
||||
@ -1154,7 +1154,7 @@ IntervalUseKind LinearScan::use_kind_of_input_operand(LIR_Op* op, LIR_Opr opr) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif // X86 S390
|
||||
#endif // X86 || S390
|
||||
|
||||
// all other operands require a register
|
||||
return mustHaveRegister;
|
||||
@ -1291,7 +1291,7 @@ void LinearScan::build_intervals() {
|
||||
if (has_fpu_registers()) {
|
||||
#ifdef X86
|
||||
if (UseSSE < 2) {
|
||||
#endif
|
||||
#endif // X86
|
||||
for (i = 0; i < FrameMap::nof_caller_save_fpu_regs; i++) {
|
||||
LIR_Opr opr = FrameMap::caller_save_fpu_reg_at(i);
|
||||
assert(opr->is_valid() && opr->is_register(), "FrameMap should not return invalid operands");
|
||||
@ -1300,6 +1300,9 @@ void LinearScan::build_intervals() {
|
||||
}
|
||||
#ifdef X86
|
||||
}
|
||||
#endif // X86
|
||||
|
||||
#ifdef X86
|
||||
if (UseSSE > 0) {
|
||||
int num_caller_save_xmm_regs = FrameMap::get_num_caller_save_xmms();
|
||||
for (i = 0; i < num_caller_save_xmm_regs; i ++) {
|
||||
@ -1309,7 +1312,7 @@ void LinearScan::build_intervals() {
|
||||
caller_save_registers[num_caller_save_registers++] = reg_num(opr);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif // X86
|
||||
}
|
||||
assert(num_caller_save_registers <= LinearScan::nof_regs, "out of bounds");
|
||||
|
||||
@ -2147,12 +2150,12 @@ LIR_Opr LinearScan::calc_operand_for_interval(const Interval* interval) {
|
||||
if (UseAVX < 3) {
|
||||
last_xmm_reg = pd_first_xmm_reg + (pd_nof_xmm_regs_frame_map / 2) - 1;
|
||||
}
|
||||
#endif
|
||||
#endif // LP64
|
||||
assert(assigned_reg >= pd_first_xmm_reg && assigned_reg <= last_xmm_reg, "no xmm register");
|
||||
assert(interval->assigned_regHi() == any_reg, "must not have hi register");
|
||||
return LIR_OprFact::single_xmm(assigned_reg - pd_first_xmm_reg);
|
||||
}
|
||||
#endif
|
||||
#endif // X86
|
||||
|
||||
assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register");
|
||||
assert(interval->assigned_regHi() == any_reg, "must not have hi register");
|
||||
@ -2167,12 +2170,12 @@ LIR_Opr LinearScan::calc_operand_for_interval(const Interval* interval) {
|
||||
if (UseAVX < 3) {
|
||||
last_xmm_reg = pd_first_xmm_reg + (pd_nof_xmm_regs_frame_map / 2) - 1;
|
||||
}
|
||||
#endif
|
||||
#endif // LP64
|
||||
assert(assigned_reg >= pd_first_xmm_reg && assigned_reg <= last_xmm_reg, "no xmm register");
|
||||
assert(interval->assigned_regHi() == any_reg, "must not have hi register (double xmm values are stored in one register)");
|
||||
return LIR_OprFact::double_xmm(assigned_reg - pd_first_xmm_reg);
|
||||
}
|
||||
#endif
|
||||
#endif // X86
|
||||
|
||||
#ifdef SPARC
|
||||
assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register");
|
||||
|
@ -1328,6 +1328,7 @@ nmethod* CompileBroker::compile_method(const methodHandle& method, int osr_bci,
|
||||
// do the compilation
|
||||
if (method->is_native()) {
|
||||
if (!PreferInterpreterNativeStubs || method->is_method_handle_intrinsic()) {
|
||||
#ifdef X86
|
||||
// The following native methods:
|
||||
//
|
||||
// java.lang.Float.intBitsToFloat
|
||||
@ -1349,6 +1350,7 @@ nmethod* CompileBroker::compile_method(const methodHandle& method, int osr_bci,
|
||||
method->intrinsic_id() == vmIntrinsics::_doubleToRawLongBits))) {
|
||||
return NULL;
|
||||
}
|
||||
#endif // X86
|
||||
|
||||
// To properly handle the appendix argument for out-of-line calls we are using a small trampoline that
|
||||
// pops off the appendix argument and jumps to the target (see gen_special_dispatch in SharedRuntime).
|
||||
|
@ -241,7 +241,7 @@ JVMCIObjectArray CompilerToVM::initialize_intrinsics(JVMCI_TRAPS) {
|
||||
do_bool_flag(UseSHA1Intrinsics) \
|
||||
do_bool_flag(UseSHA256Intrinsics) \
|
||||
do_bool_flag(UseSHA512Intrinsics) \
|
||||
do_intx_flag(UseSSE) \
|
||||
X86_ONLY(do_intx_flag(UseSSE)) \
|
||||
COMPILER2_PRESENT(do_bool_flag(UseSquareToLenIntrinsic)) \
|
||||
do_bool_flag(UseStackBanging) \
|
||||
do_bool_flag(UseTLAB) \
|
||||
|
@ -545,6 +545,9 @@ static SpecialFlag const special_jvm_flags[] = {
|
||||
{ "UseParallelOldGC", JDK_Version::jdk(14), JDK_Version::jdk(15), JDK_Version::jdk(16) },
|
||||
{ "CompactFields", JDK_Version::jdk(14), JDK_Version::jdk(15), JDK_Version::jdk(16) },
|
||||
{ "FieldsAllocationStyle", JDK_Version::jdk(14), JDK_Version::jdk(15), JDK_Version::jdk(16) },
|
||||
#ifndef X86
|
||||
{ "UseSSE", JDK_Version::undefined(), JDK_Version::jdk(15), JDK_Version::jdk(16) },
|
||||
#endif // !X86
|
||||
|
||||
#ifdef TEST_VERIFY_SPECIAL_JVM_FLAGS
|
||||
// These entries will generate build errors. Their purpose is to test the macros.
|
||||
|
@ -214,10 +214,6 @@ const size_t minimumSymbolTableSize = 1024;
|
||||
"Maximum number of pages to include in the page scan procedure") \
|
||||
range(0, max_uintx) \
|
||||
\
|
||||
product(intx, UseSSE, 99, \
|
||||
"Highest supported SSE instructions set on x86/x64") \
|
||||
range(0, 99) \
|
||||
\
|
||||
product(bool, UseAES, false, \
|
||||
"Control whether AES instructions are used when available") \
|
||||
\
|
||||
|
@ -26,7 +26,7 @@
|
||||
* @bug 6579789
|
||||
* @summary Internal error "c1_LinearScan.cpp:1429 Error: assert(false,"")" in debuggee with fastdebug VM
|
||||
*
|
||||
* @run main/othervm -Xcomp -XX:UseSSE=0
|
||||
* @run main/othervm -Xcomp -XX:+IgnoreUnrecognizedVMOptions -XX:UseSSE=0
|
||||
* -XX:CompileCommand=compileonly,compiler.c1.Test6579789::bug
|
||||
* compiler.c1.Test6579789
|
||||
*/
|
||||
|
@ -26,7 +26,7 @@
|
||||
* @bug 6855215
|
||||
* @summary Calculation error (NaN) after about 1500 calculations
|
||||
*
|
||||
* @run main/othervm -Xbatch -XX:UseSSE=0 compiler.c1.Test6855215
|
||||
* @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:UseSSE=0 compiler.c1.Test6855215
|
||||
*/
|
||||
|
||||
package compiler.c1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user