8129551: aarch64: some regressions introduced by addition of vectorisation code

Fix regressions Reviewed-by: kvn
2015-06-23 18:56:17 +00:00 · 2015-06-23 18:56:17 +00:00 · d17ff6b63d
commit d17ff6b63d
parent 9ced0d90d5
4 changed files with 54 additions and 40 deletions
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad
@ -13276,7 +13276,7 @@ instruct replicate16B_imm(vecX dst, immI con)
  ins_cost(INSN_COST);
  format %{ "movi  $dst, $con\t# vector(16B)" %}
  ins_encode %{
-    __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant);
+    __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
  %}
  ins_pipe(pipe_class_default);
 %}
@ -13298,7 +13298,7 @@ instruct replicate8S_imm(vecX dst, immI con)
  ins_cost(INSN_COST);
  format %{ "movi  $dst, $con\t# vector(8H)" %}
  ins_encode %{
-    __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant);
+    __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
  %}
  ins_pipe(pipe_class_default);
 %}
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
@ -491,6 +491,11 @@ class Address VALUE_OBJ_CLASS_SPEC {
        i->rf(_index, 16);
        i->f(_ext.option(), 15, 13);
        unsigned size = i->get(31, 30);
+        if (i->get(26, 26) && i->get(23, 23)) {
+          // SIMD Q Type - Size = 128 bits
+          assert(size == 0, "bad size");
+          size = 0b100;
+        }
        if (size == 0) // It's a byte
          i->f(_ext.shift() >= 0, 12);
        else {
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
@ -1408,6 +1408,52 @@ void MacroAssembler::movptr(Register r, uintptr_t imm64) {
  movk(r, imm64 & 0xffff, 32);
 }

+// Macro to mov replicated immediate to vector register.
+//  Vd will get the following values for different arrangements in T
+//   imm32 == hex 000000gh  T8B:  Vd = ghghghghghghghgh
+//   imm32 == hex 000000gh  T16B: Vd = ghghghghghghghghghghghghghghghgh
+//   imm32 == hex 0000efgh  T4H:  Vd = efghefghefghefgh
+//   imm32 == hex 0000efgh  T8H:  Vd = efghefghefghefghefghefghefghefgh
+//   imm32 == hex abcdefgh  T2S:  Vd = abcdefghabcdefgh
+//   imm32 == hex abcdefgh  T4S:  Vd = abcdefghabcdefghabcdefghabcdefgh
+//   T1D/T2D: invalid
+void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) {
+  assert(T != T1D && T != T2D, "invalid arrangement");
+  if (T == T8B || T == T16B) {
+    assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
+    movi(Vd, T, imm32 & 0xff, 0);
+    return;
+  }
+  u_int32_t nimm32 = ~imm32;
+  if (T == T4H || T == T8H) {
+    assert((imm32  & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)");
+    imm32 &= 0xffff;
+    nimm32 &= 0xffff;
+  }
+  u_int32_t x = imm32;
+  int movi_cnt = 0;
+  int movn_cnt = 0;
+  while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
+  x = nimm32;
+  while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
+  if (movn_cnt < movi_cnt) imm32 = nimm32;
+  unsigned lsl = 0;
+  while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
+  if (movn_cnt < movi_cnt)
+    mvni(Vd, T, imm32 & 0xff, lsl);
+  else
+    movi(Vd, T, imm32 & 0xff, lsl);
+  imm32 >>= 8; lsl += 8;
+  while (imm32) {
+    while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
+    if (movn_cnt < movi_cnt)
+      bici(Vd, T, imm32 & 0xff, lsl);
+    else
+      orri(Vd, T, imm32 & 0xff, lsl);
+    lsl += 8; imm32 >>= 8;
+  }
+}
+
 void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64)
 {
 #ifndef PRODUCT
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
@ -465,44 +465,7 @@ public:

  void movptr(Register r, uintptr_t imm64);

-  // Macro to mov replicated immediate to vector register.
-  // Where imm32 == hex abcdefgh, Vd will get the following values
-  // for different arrangements in T
-  //   T8B:  Vd = ghghghghghghghgh
-  //   T16B: Vd = ghghghghghghghghghghghghghghghgh
-  //   T4H:  Vd = efghefghefghefgh
-  //   T8H:  Vd = efghefghefghefghefghefghefghefgh
-  //   T2S:  Vd = abcdefghabcdefgh
-  //   T4S:  Vd = abcdefghabcdefghabcdefghabcdefgh
-  //   T1D/T2D: invalid
-  void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) {
-    assert(T != T1D && T != T2D, "invalid arrangement");
-    u_int32_t nimm32 = ~imm32;
-    if (T == T8B || T == T16B) { imm32 &= 0xff; nimm32 &= 0xff; }
-    if (T == T4H || T == T8H) { imm32 &= 0xffff; nimm32 &= 0xffff; }
-    u_int32_t x = imm32;
-    int movi_cnt = 0;
-    int movn_cnt = 0;
-    while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
-    x = nimm32;
-    while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
-    if (movn_cnt < movi_cnt) imm32 = nimm32;
-    unsigned lsl = 0;
-    while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
-    if (movn_cnt < movi_cnt)
-      mvni(Vd, T, imm32 & 0xff, lsl);
-    else
-      movi(Vd, T, imm32 & 0xff, lsl);
-    imm32 >>= 8; lsl += 8;
-    while (imm32) {
-      while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
-      if (movn_cnt < movi_cnt)
-        bici(Vd, T, imm32 & 0xff, lsl);
-      else
-        orri(Vd, T, imm32 & 0xff, lsl);
-      lsl += 8; imm32 >>= 8;
-    }
-  }
+  void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32);

  // macro instructions for accessing and updating floating point
  // status register