8129551: aarch64: some regressions introduced by addition of vectorisation code

Fix regressions Reviewed-by: kvn
2015-06-23 18:56:17 +00:00 · 2015-06-23 18:56:17 +00:00 · d17ff6b63d
commit d17ff6b63d
parent 9ced0d90d5
4 changed files with 54 additions and 40 deletions
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad
@ -13276,7 +13276,7 @@ instruct replicate16B_imm(vecX dst, immI con)
  ins_cost(INSN_COST);
  format %{ "movi  $dst, $con\t# vector(16B)" %}
  ins_encode %{
-    __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant);
+    __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
  %}
  ins_pipe(pipe_class_default);
 %}
@ -13298,7 +13298,7 @@ instruct replicate8S_imm(vecX dst, immI con)
  ins_cost(INSN_COST);
  format %{ "movi  $dst, $con\t# vector(8H)" %}
  ins_encode %{
-    __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant);
+    __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
  %}
  ins_pipe(pipe_class_default);
 %}
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
@ -491,6 +491,11 @@ class Address VALUE_OBJ_CLASS_SPEC {
        i->rf(_index, 16);
        i->f(_ext.option(), 15, 13);
        unsigned size = i->get(31, 30);
        if (i->get(26, 26) && i->get(23, 23)) {
          // SIMD Q Type - Size = 128 bits
          assert(size == 0, "bad size");
          size = 0b100;
        }
        if (size == 0) // It's a byte
          i->f(_ext.shift() >= 0, 12);
        else {
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
@ -1408,6 +1408,52 @@ void MacroAssembler::movptr(Register r, uintptr_t imm64) {
  movk(r, imm64 & 0xffff, 32);
 }
 // Macro to mov replicated immediate to vector register.
 //  Vd will get the following values for different arrangements in T
 //   imm32 == hex 000000gh  T8B:  Vd = ghghghghghghghgh
 //   imm32 == hex 000000gh  T16B: Vd = ghghghghghghghghghghghghghghghgh
 //   imm32 == hex 0000efgh  T4H:  Vd = efghefghefghefgh
 //   imm32 == hex 0000efgh  T8H:  Vd = efghefghefghefghefghefghefghefgh
 //   imm32 == hex abcdefgh  T2S:  Vd = abcdefghabcdefgh
 //   imm32 == hex abcdefgh  T4S:  Vd = abcdefghabcdefghabcdefghabcdefgh
 //   T1D/T2D: invalid
 void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) {
  assert(T != T1D && T != T2D, "invalid arrangement");
  if (T == T8B || T == T16B) {
    assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
    movi(Vd, T, imm32 & 0xff, 0);
    return;
  }
  u_int32_t nimm32 = ~imm32;
  if (T == T4H || T == T8H) {
    assert((imm32  & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)");
    imm32 &= 0xffff;
    nimm32 &= 0xffff;
  }
  u_int32_t x = imm32;
  int movi_cnt = 0;
  int movn_cnt = 0;
  while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
  x = nimm32;
  while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
  if (movn_cnt < movi_cnt) imm32 = nimm32;
  unsigned lsl = 0;
  while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
  if (movn_cnt < movi_cnt)
    mvni(Vd, T, imm32 & 0xff, lsl);
  else
    movi(Vd, T, imm32 & 0xff, lsl);
  imm32 >>= 8; lsl += 8;
  while (imm32) {
    while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
    if (movn_cnt < movi_cnt)
      bici(Vd, T, imm32 & 0xff, lsl);
    else
      orri(Vd, T, imm32 & 0xff, lsl);
    lsl += 8; imm32 >>= 8;
  }
 }
 void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64)
 {
 #ifndef PRODUCT
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
@ -465,44 +465,7 @@ public:
  void movptr(Register r, uintptr_t imm64);
-  // Macro to mov replicated immediate to vector register.
+  void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32);
  // Where imm32 == hex abcdefgh, Vd will get the following values
  // for different arrangements in T
  //   T8B:  Vd = ghghghghghghghgh
  //   T16B: Vd = ghghghghghghghghghghghghghghghgh
  //   T4H:  Vd = efghefghefghefgh
  //   T8H:  Vd = efghefghefghefghefghefghefghefgh
  //   T2S:  Vd = abcdefghabcdefgh
  //   T4S:  Vd = abcdefghabcdefghabcdefghabcdefgh
  //   T1D/T2D: invalid
  void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) {
    assert(T != T1D && T != T2D, "invalid arrangement");
    u_int32_t nimm32 = ~imm32;
    if (T == T8B || T == T16B) { imm32 &= 0xff; nimm32 &= 0xff; }
    if (T == T4H || T == T8H) { imm32 &= 0xffff; nimm32 &= 0xffff; }
    u_int32_t x = imm32;
    int movi_cnt = 0;
    int movn_cnt = 0;
    while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
    x = nimm32;
    while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
    if (movn_cnt < movi_cnt) imm32 = nimm32;
    unsigned lsl = 0;
    while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
    if (movn_cnt < movi_cnt)
      mvni(Vd, T, imm32 & 0xff, lsl);
    else
      movi(Vd, T, imm32 & 0xff, lsl);
    imm32 >>= 8; lsl += 8;
    while (imm32) {
      while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
      if (movn_cnt < movi_cnt)
        bici(Vd, T, imm32 & 0xff, lsl);
      else
        orri(Vd, T, imm32 & 0xff, lsl);
      lsl += 8; imm32 >>= 8;
    }
  }
  // macro instructions for accessing and updating floating point
  // status register