8153310: AArch64: JEP 254: Implement byte_array_inflate

Reviewed-by: roland
2016-04-20 11:05:28 +00:00 · 2016-04-20 11:05:28 +00:00 · cdcd378bd6
commit cdcd378bd6
parent 66208f1fca
4 changed files with 159 additions and 13 deletions
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad
@ -14930,6 +14930,40 @@ instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
 %}


+// fast char[] to byte[] compression
+instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
+                         vRegD_V0 tmp1, vRegD_V1 tmp2,
+                         vRegD_V2 tmp3, vRegD_V3 tmp4,
+                         iRegI_R0 result, rFlagsReg cr)
+%{
+  match(Set result (StrCompressedCopy src (Binary dst len)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
+
+  format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
+  ins_encode %{
+    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
+                           $tmp1$$FloatRegister, $tmp2$$FloatRegister,
+                           $tmp3$$FloatRegister, $tmp4$$FloatRegister,
+                           $result$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// fast byte[] to char[] inflation
+instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
+                        vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
+%{
+  match(Set dummy (StrInflatedCopy src (Binary dst len)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
+
+  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
+  ins_encode %{
+    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
+                          $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
 // encode char[] to byte[] in ISO_8859_1
 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
                          vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
@ -2245,18 +2245,18 @@ public:
    rf(Vn, 5), rf(Rd, 0);
  }

-#define INSN(NAME, opc, opc2) \
-  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){         \
-    starti;                                                                             \
-    /* The encodings for the immh:immb fields (bits 22:16) are                          \
-     *   0001 xxx       8B/16B, shift = xxx                                             \
-     *   001x xxx       4H/8H,  shift = xxxx                                            \
-     *   01xx xxx       2S/4S,  shift = xxxxx                                           \
-     *   1xxx xxx       1D/2D,  shift = xxxxxx (1D is RESERVED)                         \
-     */                                                                                 \
-    assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value");                           \
-    f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23),                            \
-    f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0);          \
+#define INSN(NAME, opc, opc2)                                           \
+  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
+    starti;                                                             \
+    /* The encodings for the immh:immb fields (bits 22:16) are          \
+     *   0001 xxx       8B/16B, shift = xxx                             \
+     *   001x xxx       4H/8H,  shift = xxxx                            \
+     *   01xx xxx       2S/4S,  shift = xxxxx                           \
+     *   1xxx xxx       1D/2D,  shift = xxxxxx (1D is RESERVED)         \
+     */                                                                 \
+    assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value");           \
+    f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23),            \
+    f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \
  }

  INSN(shl,  0, 0b010101);
@ -2347,6 +2347,24 @@ public:
    f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
  }

+  // AdvSIMD ZIP/UZP/TRN
+#define INSN(NAME, opcode)                                              \
+  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
+    starti;                                                             \
+    f(0, 31), f(0b001110, 29, 24), f(0, 21), f(0b001110, 15, 10);       \
+    rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);                                   \
+    f(T & 1, 30), f(T >> 1, 23, 22);                                    \
+  }
+
+  INSN(uzp1, 0b001);
+  INSN(trn1, 0b010);
+  INSN(zip1, 0b011);
+  INSN(uzp2, 0b101);
+  INSN(trn2, 0b110);
+  INSN(zip2, 0b111);
+
+#undef INSN
+
  // CRC32 instructions
 #define INSN(NAME, c, sf, sz)                                             \
  void NAME(Register Rd, Register Rn, Register Rm) {                      \
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
@ -4680,7 +4680,8 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
 }


-// encode char[] to byte[] in ISO_8859_1
+// Intrinsic for sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray and
+// java/lang/StringUTF16.compress.
 void MacroAssembler::encode_iso_array(Register src, Register dst,
                      Register len, Register result,
                      FloatRegister Vtmp1, FloatRegister Vtmp2,
@ -4743,6 +4744,90 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,

    BIND(DONE);
      sub(result, result, len); // Return index where we stopped
+                                // Return len == 0 if we processed all
+                                // characters
+}
+
+
+// Inflate byte[] array to char[].
+void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
+                                        FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
+                                        Register tmp4) {
+  Label big, done;
+
+  assert_different_registers(src, dst, len, tmp4, rscratch1);
+
+  fmovd(vtmp1 , zr);
+  lsrw(rscratch1, len, 3);
+
+  cbnzw(rscratch1, big);
+
+  // Short string: less than 8 bytes.
+  {
+    Label loop, around, tiny;
+
+    subsw(len, len, 4);
+    andw(len, len, 3);
+    br(LO, tiny);
+
+    // Use SIMD to do 4 bytes.
+    ldrs(vtmp2, post(src, 4));
+    zip1(vtmp3, T8B, vtmp2, vtmp1);
+    strd(vtmp3, post(dst, 8));
+
+    cbzw(len, done);
+
+    // Do the remaining bytes by steam.
+    bind(loop);
+    ldrb(tmp4, post(src, 1));
+    strh(tmp4, post(dst, 2));
+    subw(len, len, 1);
+
+    bind(tiny);
+    cbnz(len, loop);
+
+    bind(around);
+    b(done);
+  }
+
+  // Unpack the bytes 8 at a time.
+  bind(big);
+  andw(len, len, 7);
+
+  {
+    Label loop, around;
+
+    bind(loop);
+    ldrd(vtmp2, post(src, 8));
+    sub(rscratch1, rscratch1, 1);
+    zip1(vtmp3, T16B, vtmp2, vtmp1);
+    st1(vtmp3, T8H, post(dst, 16));
+    cbnz(rscratch1, loop);
+
+    bind(around);
+  }
+
+  // Do the tail of up to 8 bytes.
+  sub(src, src, 8);
+  add(src, src, len, ext::uxtw, 0);
+  ldrd(vtmp2, Address(src));
+  sub(dst, dst, 16);
+  add(dst, dst, len, ext::uxtw, 1);
+  zip1(vtmp3, T16B, vtmp2, vtmp1);
+  st1(vtmp3, T8H, Address(dst));
+
+  bind(done);
+}
+
+// Compress char[] array to byte[].
+void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
+                                         FloatRegister tmp1Reg, FloatRegister tmp2Reg,
+                                         FloatRegister tmp3Reg, FloatRegister tmp4Reg,
+                                         Register result) {
+  encode_iso_array(src, dst, len, result,
+                   tmp1Reg, tmp2Reg, tmp3Reg, tmp4Reg);
+  cmp(len, zr);
+  csel(result, result, zr, EQ);
 }

 // get_thread() can be called anywhere inside generated code so we
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
@ -1184,6 +1184,15 @@ public:
                     Register result, Register cnt1,
                     int elem_size, bool is_string);

+  void byte_array_inflate(Register src, Register dst, Register len,
+                          FloatRegister vtmp1, FloatRegister vtmp2,
+                          FloatRegister vtmp3, Register tmp4);
+
+  void char_array_compress(Register src, Register dst, Register len,
+                           FloatRegister tmp1Reg, FloatRegister tmp2Reg,
+                           FloatRegister tmp3Reg, FloatRegister tmp4Reg,
+                           Register result);
+
  void encode_iso_array(Register src, Register dst,
                        Register len, Register result,
                        FloatRegister Vtmp1, FloatRegister Vtmp2,