8238681: Make -XX:UseSSE flag x86-specific

Reviewed-by: dholmes, kvn
2020-03-10 20:51:07 +03:00 · 2020-03-10 20:51:07 +03:00 · 072cfd2e48
commit 072cfd2e48
parent 8208b9ce32
14 changed files with 108 additions and 76 deletions
--- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp
@ -192,8 +192,6 @@ void VM_Version::initialize() {
  _supports_atomic_getset8 = true;
  _supports_atomic_getadd8 = true;

-  UseSSE = 0; // Only on x86 and x64
-
  intx cache_line_size = L1_data_cache_line_size();

  if (PowerArchitecturePPC64 >= 9) {
--- a/src/hotspot/cpu/sparc/vm_version_sparc.cpp
+++ b/src/hotspot/cpu/sparc/vm_version_sparc.cpp
@ -68,8 +68,6 @@ void VM_Version::initialize() {
    }
  }

-  UseSSE = false;                   // Only used on x86 and x64.
-
  _supports_cx8 = true;             // All SPARC V9 implementations.
  _supports_atomic_getset4 = true;  // Using the 'swap' instruction.

--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
@ -93,9 +93,13 @@ LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
    case intTag:     opr = FrameMap::rax_opr;          break;
    case objectTag:  opr = FrameMap::rax_oop_opr;      break;
    case longTag:    opr = FrameMap::long0_opr;        break;
+#ifdef _LP64
+    case floatTag:   opr = FrameMap::xmm0_float_opr;   break;
+    case doubleTag:  opr = FrameMap::xmm0_double_opr;  break;
+#else
    case floatTag:   opr = UseSSE >= 1 ? FrameMap::xmm0_float_opr  : FrameMap::fpu0_float_opr;  break;
    case doubleTag:  opr = UseSSE >= 2 ? FrameMap::xmm0_double_opr : FrameMap::fpu0_double_opr;  break;
-
+#endif // _LP64
    case addressTag:
    default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
  }
@ -356,6 +360,7 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
    left.dont_load_item();
  }

+#ifndef _LP64
  // do not load right operand if it is a constant.  only 0 and 1 are
  // loaded because there are special instructions for loading them
  // without memory access (not needed for SSE2 instructions)
@ -371,13 +376,18 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
      must_load_right = UseSSE < 2 && (c->is_one_double() || c->is_zero_double());
    }
  }
+#endif // !LP64

  if (must_load_both) {
    // frem and drem destroy also right operand, so move it to a new register
    right.set_destroys_register();
    right.load_item();
-  } else if (right.is_register() || must_load_right) {
+  } else if (right.is_register()) {
    right.load_item();
+#ifndef _LP64
+  } else if (must_load_right) {
+    right.load_item();
+#endif // !LP64
  } else {
    right.dont_load_item();
  }
@ -788,9 +798,11 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
  LIRItem value(x->argument_at(0), this);

  bool use_fpu = false;
+#ifndef _LP64
  if (UseSSE < 2) {
    value.set_destroys_register();
  }
+#endif // !LP64
  value.load_item();

  LIR_Opr calc_input = value.result();
@ -1552,10 +1564,12 @@ void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
    LIR_Opr temp_double = new_register(T_DOUBLE);
    __ volatile_move(LIR_OprFact::address(address), temp_double, T_LONG, info);
    __ volatile_move(temp_double, result, T_LONG);
+#ifndef _LP64
    if (UseSSE < 2) {
      // no spill slot needed in SSE2 mode because xmm->cpu register move is possible
      set_vreg_flag(result, must_start_in_memory);
    }
+#endif // !LP64
  } else {
    __ load(address, result, info);
  }
--- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
@ -367,6 +367,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
 #endif

  if (save_fpu_registers) {
+#ifndef _LP64
    if (UseSSE < 2) {
      int fpu_off = float_regs_as_doubles_off;
      for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
@ -379,7 +380,18 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
        fpu_off += 2;
      }
      assert(fpu_off == fpu_state_off, "incorrect number of fpu stack slots");
+
+      if (UseSSE == 1) {
+        int xmm_off = xmm_regs_as_doubles_off;
+        for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
+          VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
+          map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
+          xmm_off += 2;
+        }
+        assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
+      }
    }
+#endif // !LP64

    if (UseSSE >= 2) {
      int xmm_off = xmm_regs_as_doubles_off;
@ -395,15 +407,6 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
        xmm_off += 2;
      }
      assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
-
-    } else if (UseSSE == 1) {
-      int xmm_off = xmm_regs_as_doubles_off;
-      for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
-        VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
-        map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
-        xmm_off += 2;
-      }
-      assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
    }
  }

@ -454,6 +457,16 @@ void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers)
        __ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
        offset += 8;
      }
+
+      if (UseSSE == 1) {
+        // save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
+        int offset = 0;
+        for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
+          XMMRegister xmm_name = as_XMMRegister(n);
+          __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
+          offset += 8;
+        }
+      }
    }
 #endif // !_LP64

@ -475,16 +488,6 @@ void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers)
        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
        offset += 8;
      }
-#ifndef _LP64
-    } else if (UseSSE == 1) {
-      // save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
-      int offset = 0;
-      for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
-        XMMRegister xmm_name = as_XMMRegister(n);
-        __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
-        offset += 8;
-      }
-#endif // !_LP64
    }
  }

--- a/src/hotspot/cpu/x86/globals_x86.hpp
+++ b/src/hotspot/cpu/x86/globals_x86.hpp
@ -103,6 +103,10 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
  product(bool, UseStoreImmI16, true,                                       \
          "Use store immediate 16-bits value instruction on x86")           \
                                                                            \
+  product(intx, UseSSE, 99,                                                 \
+          "Highest supported SSE instructions set on x86/x64")              \
+          range(0, 99)                                                      \
+                                                                            \
  product(intx, UseAVX, 3,                                                  \
          "Highest supported AVX instructions set on x86/x64")              \
          range(0, 99)                                                      \
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@ -2724,17 +2724,6 @@ void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
  }
 }

-#ifndef _LP64
-void MacroAssembler::empty_FPU_stack() {
-  if (VM_Version::supports_mmx()) {
-    emms();
-  } else {
-    for (int i = 8; i-- > 0; ) ffree(i);
-  }
-}
-#endif // !LP64
-
-
 void MacroAssembler::enter() {
  push(rbp);
  mov(rbp, rsp);
@ -2753,7 +2742,7 @@ void MacroAssembler::fat_nop() {
  }
 }

-#if !defined(_LP64)
+#ifndef _LP64
 void MacroAssembler::fcmp(Register tmp) {
  fcmp(tmp, 1, true, true);
 }
@ -2856,6 +2845,14 @@ void MacroAssembler::fremr(Register tmp) {
  fxch(1);
  fpop();
 }
+
+void MacroAssembler::empty_FPU_stack() {
+  if (VM_Version::supports_mmx()) {
+    emms();
+  } else {
+    for (int i = 8; i-- > 0; ) ffree(i);
+  }
+}
 #endif // !LP64

 void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
@ -2868,39 +2865,51 @@ void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
 }

 void MacroAssembler::load_float(Address src) {
+#ifdef _LP64
+  movflt(xmm0, src);
+#else
  if (UseSSE >= 1) {
    movflt(xmm0, src);
  } else {
-    LP64_ONLY(ShouldNotReachHere());
-    NOT_LP64(fld_s(src));
+    fld_s(src);
  }
+#endif // LP64
 }

 void MacroAssembler::store_float(Address dst) {
+#ifdef _LP64
+  movflt(dst, xmm0);
+#else
  if (UseSSE >= 1) {
    movflt(dst, xmm0);
  } else {
-    LP64_ONLY(ShouldNotReachHere());
-    NOT_LP64(fstp_s(dst));
+    fstp_s(dst);
  }
+#endif // LP64
 }

 void MacroAssembler::load_double(Address src) {
+#ifdef _LP64
+  movdbl(xmm0, src);
+#else
  if (UseSSE >= 2) {
    movdbl(xmm0, src);
  } else {
-    LP64_ONLY(ShouldNotReachHere());
-    NOT_LP64(fld_d(src));
+    fld_d(src);
  }
+#endif // LP64
 }

 void MacroAssembler::store_double(Address dst) {
+#ifdef _LP64
+  movdbl(dst, xmm0);
+#else
  if (UseSSE >= 2) {
    movdbl(dst, xmm0);
  } else {
-    LP64_ONLY(ShouldNotReachHere());
-    NOT_LP64(fstp_d(dst));
+    fstp_d(dst);
  }
+#endif // LP64
 }

 // dst = c = a * b + c
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@ -1295,14 +1295,6 @@ const bool Matcher::match_rule_supported(int opcode) {
        return false;
      }
      break;
-    case Op_AddReductionVF:
-    case Op_AddReductionVD:
-    case Op_MulReductionVF:
-    case Op_MulReductionVD:
-      if (UseSSE < 1) { // requires at least SSE
-        return false;
-      }
-      break;
    case Op_SqrtVD:
    case Op_SqrtVF:
      if (UseAVX < 1) { // enabled for AVX only
@ -1338,14 +1330,6 @@ const bool Matcher::match_rule_supported(int opcode) {
        return false;
      }
      break;
-    case Op_MulAddVS2VI:
-    case Op_RShiftVL:
-    case Op_AbsVD:
-    case Op_NegVD:
-      if (UseSSE < 2) {
-        return false;
-      }
-      break;
    case Op_MulVB:
    case Op_LShiftVB:
    case Op_RShiftVB:
@ -1381,6 +1365,24 @@ const bool Matcher::match_rule_supported(int opcode) {
        return false; // 128bit vroundpd is not available
      }
      break;
+#ifndef _LP64
+    case Op_AddReductionVF:
+    case Op_AddReductionVD:
+    case Op_MulReductionVF:
+    case Op_MulReductionVD:
+      if (UseSSE < 1) { // requires at least SSE
+        return false;
+      }
+      break;
+    case Op_MulAddVS2VI:
+    case Op_RShiftVL:
+    case Op_AbsVD:
+    case Op_NegVD:
+      if (UseSSE < 2) {
+        return false;
+      }
+      break;
+#endif // !LP64
  }
  return true;  // Match rules are supported by default.
 }
--- a/src/hotspot/share/c1/c1_LinearScan.cpp
+++ b/src/hotspot/share/c1/c1_LinearScan.cpp
@ -1092,7 +1092,7 @@ IntervalUseKind LinearScan::use_kind_of_input_operand(LIR_Op* op, LIR_Opr opr) {
  // this operand is allowed to be on the stack in some cases
  BasicType opr_type = opr->type_register();
  if (opr_type == T_FLOAT || opr_type == T_DOUBLE) {
-    if ((UseSSE == 1 && opr_type == T_FLOAT) || UseSSE >= 2 S390_ONLY(|| true)) {
+    if (IA32_ONLY( (UseSSE == 1 && opr_type == T_FLOAT) || UseSSE >= 2 ) NOT_IA32( true )) {
      // SSE float instruction (T_DOUBLE only supported with SSE2)
      switch (op->code()) {
        case lir_cmp:
@ -1154,7 +1154,7 @@ IntervalUseKind LinearScan::use_kind_of_input_operand(LIR_Op* op, LIR_Opr opr) {
        break;
    }
  }
-#endif // X86 S390
+#endif // X86 || S390

  // all other operands require a register
  return mustHaveRegister;
@ -1291,7 +1291,7 @@ void LinearScan::build_intervals() {
  if (has_fpu_registers()) {
 #ifdef X86
    if (UseSSE < 2) {
-#endif
+#endif // X86
      for (i = 0; i < FrameMap::nof_caller_save_fpu_regs; i++) {
        LIR_Opr opr = FrameMap::caller_save_fpu_reg_at(i);
        assert(opr->is_valid() && opr->is_register(), "FrameMap should not return invalid operands");
@ -1300,6 +1300,9 @@ void LinearScan::build_intervals() {
      }
 #ifdef X86
    }
+#endif // X86
+
+#ifdef X86
    if (UseSSE > 0) {
      int num_caller_save_xmm_regs = FrameMap::get_num_caller_save_xmms();
      for (i = 0; i < num_caller_save_xmm_regs; i ++) {
@ -1309,7 +1312,7 @@ void LinearScan::build_intervals() {
        caller_save_registers[num_caller_save_registers++] = reg_num(opr);
      }
    }
-#endif
+#endif // X86
  }
  assert(num_caller_save_registers <= LinearScan::nof_regs, "out of bounds");

@ -2147,12 +2150,12 @@ LIR_Opr LinearScan::calc_operand_for_interval(const Interval* interval) {
          if (UseAVX < 3) {
            last_xmm_reg = pd_first_xmm_reg + (pd_nof_xmm_regs_frame_map / 2) - 1;
          }
-#endif
+#endif // LP64
          assert(assigned_reg >= pd_first_xmm_reg && assigned_reg <= last_xmm_reg, "no xmm register");
          assert(interval->assigned_regHi() == any_reg, "must not have hi register");
          return LIR_OprFact::single_xmm(assigned_reg - pd_first_xmm_reg);
        }
-#endif
+#endif // X86

        assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register");
        assert(interval->assigned_regHi() == any_reg, "must not have hi register");
@ -2167,12 +2170,12 @@ LIR_Opr LinearScan::calc_operand_for_interval(const Interval* interval) {
          if (UseAVX < 3) {
            last_xmm_reg = pd_first_xmm_reg + (pd_nof_xmm_regs_frame_map / 2) - 1;
          }
-#endif
+#endif // LP64
          assert(assigned_reg >= pd_first_xmm_reg && assigned_reg <= last_xmm_reg, "no xmm register");
          assert(interval->assigned_regHi() == any_reg, "must not have hi register (double xmm values are stored in one register)");
          return LIR_OprFact::double_xmm(assigned_reg - pd_first_xmm_reg);
        }
-#endif
+#endif // X86

 #ifdef SPARC
        assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register");
--- a/src/hotspot/share/compiler/compileBroker.cpp
+++ b/src/hotspot/share/compiler/compileBroker.cpp
@ -1328,6 +1328,7 @@ nmethod* CompileBroker::compile_method(const methodHandle& method, int osr_bci,
  // do the compilation
  if (method->is_native()) {
    if (!PreferInterpreterNativeStubs || method->is_method_handle_intrinsic()) {
+#ifdef X86
      // The following native methods:
      //
      // java.lang.Float.intBitsToFloat
@ -1349,6 +1350,7 @@ nmethod* CompileBroker::compile_method(const methodHandle& method, int osr_bci,
            method->intrinsic_id() == vmIntrinsics::_doubleToRawLongBits))) {
        return NULL;
      }
+#endif // X86

      // To properly handle the appendix argument for out-of-line calls we are using a small trampoline that
      // pops off the appendix argument and jumps to the target (see gen_special_dispatch in SharedRuntime).
--- a/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp
+++ b/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp
@ -241,7 +241,7 @@ JVMCIObjectArray CompilerToVM::initialize_intrinsics(JVMCI_TRAPS) {
  do_bool_flag(UseSHA1Intrinsics)                                          \
  do_bool_flag(UseSHA256Intrinsics)                                        \
  do_bool_flag(UseSHA512Intrinsics)                                        \
-  do_intx_flag(UseSSE)                                                     \
+  X86_ONLY(do_intx_flag(UseSSE))                                           \
  COMPILER2_PRESENT(do_bool_flag(UseSquareToLenIntrinsic))                 \
  do_bool_flag(UseStackBanging)                                            \
  do_bool_flag(UseTLAB)                                                    \
--- a/src/hotspot/share/runtime/arguments.cpp
+++ b/src/hotspot/share/runtime/arguments.cpp
@ -545,6 +545,9 @@ static SpecialFlag const special_jvm_flags[] = {
  { "UseParallelOldGC",              JDK_Version::jdk(14),     JDK_Version::jdk(15), JDK_Version::jdk(16) },
  { "CompactFields",                 JDK_Version::jdk(14),     JDK_Version::jdk(15), JDK_Version::jdk(16) },
  { "FieldsAllocationStyle",         JDK_Version::jdk(14),     JDK_Version::jdk(15), JDK_Version::jdk(16) },
+#ifndef X86
+  { "UseSSE",                        JDK_Version::undefined(), JDK_Version::jdk(15), JDK_Version::jdk(16) },
+#endif // !X86

 #ifdef TEST_VERIFY_SPECIAL_JVM_FLAGS
  // These entries will generate build errors.  Their purpose is to test the macros.
--- a/src/hotspot/share/runtime/globals.hpp
+++ b/src/hotspot/share/runtime/globals.hpp
@ -214,10 +214,6 @@ const size_t minimumSymbolTableSize = 1024;
          "Maximum number of pages to include in the page scan procedure")  \
          range(0, max_uintx)                                               \
                                                                            \
-  product(intx, UseSSE, 99,                                                 \
-          "Highest supported SSE instructions set on x86/x64")              \
-          range(0, 99)                                                      \
-                                                                            \
  product(bool, UseAES, false,                                              \
          "Control whether AES instructions are used when available")       \
                                                                            \
--- a/test/hotspot/jtreg/compiler/c1/Test6579789.java
+++ b/test/hotspot/jtreg/compiler/c1/Test6579789.java
@ -26,7 +26,7 @@
 * @bug 6579789
 * @summary Internal error "c1_LinearScan.cpp:1429 Error: assert(false,"")" in debuggee with fastdebug VM
 *
- * @run main/othervm -Xcomp -XX:UseSSE=0
+ * @run main/othervm -Xcomp -XX:+IgnoreUnrecognizedVMOptions -XX:UseSSE=0
 *      -XX:CompileCommand=compileonly,compiler.c1.Test6579789::bug
 *      compiler.c1.Test6579789
 */
--- a/test/hotspot/jtreg/compiler/c1/Test6855215.java
+++ b/test/hotspot/jtreg/compiler/c1/Test6855215.java
@ -26,7 +26,7 @@
 * @bug 6855215
 * @summary Calculation error (NaN) after about 1500 calculations
 *
- * @run main/othervm -Xbatch -XX:UseSSE=0 compiler.c1.Test6855215
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:UseSSE=0 compiler.c1.Test6855215
 */

 package compiler.c1;