8153310: AArch64: JEP 254: Implement byte_array_inflate

Reviewed-by: roland
This commit is contained in:
Andrew Haley 2016-04-20 11:05:28 +00:00
parent 66208f1fca
commit cdcd378bd6
4 changed files with 159 additions and 13 deletions

View File

@ -14930,6 +14930,40 @@ instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
%}
// fast char[] to byte[] compression
instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
vRegD_V0 tmp1, vRegD_V1 tmp2,
vRegD_V2 tmp3, vRegD_V3 tmp4,
iRegI_R0 result, rFlagsReg cr)
%{
match(Set result (StrCompressedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
format %{ "String Compress $src,$dst -> $result // KILL R1, R2, R3, R4" %}
ins_encode %{
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$FloatRegister, $tmp2$$FloatRegister,
$tmp3$$FloatRegister, $tmp4$$FloatRegister,
$result$$Register);
%}
ins_pipe( pipe_slow );
%}
// fast byte[] to char[] inflation
instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
%{
match(Set dummy (StrInflatedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
ins_encode %{
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
%}
ins_pipe(pipe_class_memory);
%}
// encode char[] to byte[] in ISO_8859_1
instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,

View File

@ -2245,18 +2245,18 @@ public:
rf(Vn, 5), rf(Rd, 0);
}
#define INSN(NAME, opc, opc2) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
starti; \
/* The encodings for the immh:immb fields (bits 22:16) are \
* 0001 xxx 8B/16B, shift = xxx \
* 001x xxx 4H/8H, shift = xxxx \
* 01xx xxx 2S/4S, shift = xxxxx \
* 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) \
*/ \
assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \
f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \
f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \
#define INSN(NAME, opc, opc2) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
starti; \
/* The encodings for the immh:immb fields (bits 22:16) are \
* 0001 xxx 8B/16B, shift = xxx \
* 001x xxx 4H/8H, shift = xxxx \
* 01xx xxx 2S/4S, shift = xxxxx \
* 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) \
*/ \
assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \
f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \
f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \
}
INSN(shl, 0, 0b010101);
@ -2347,6 +2347,24 @@ public:
f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
}
// AdvSIMD ZIP/UZP/TRN
#define INSN(NAME, opcode) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
starti; \
f(0, 31), f(0b001110, 29, 24), f(0, 21), f(0b001110, 15, 10); \
rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); \
f(T & 1, 30), f(T >> 1, 23, 22); \
}
INSN(uzp1, 0b001);
INSN(trn1, 0b010);
INSN(zip1, 0b011);
INSN(uzp2, 0b101);
INSN(trn2, 0b110);
INSN(zip2, 0b111);
#undef INSN
// CRC32 instructions
#define INSN(NAME, c, sf, sz) \
void NAME(Register Rd, Register Rn, Register Rm) { \

View File

@ -4680,7 +4680,8 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
}
// encode char[] to byte[] in ISO_8859_1
// Intrinsic for sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray and
// java/lang/StringUTF16.compress.
void MacroAssembler::encode_iso_array(Register src, Register dst,
Register len, Register result,
FloatRegister Vtmp1, FloatRegister Vtmp2,
@ -4743,6 +4744,90 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
BIND(DONE);
sub(result, result, len); // Return index where we stopped
// Return len == 0 if we processed all
// characters
}
// Inflate byte[] array to char[].
void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
Register tmp4) {
Label big, done;
assert_different_registers(src, dst, len, tmp4, rscratch1);
fmovd(vtmp1 , zr);
lsrw(rscratch1, len, 3);
cbnzw(rscratch1, big);
// Short string: less than 8 bytes.
{
Label loop, around, tiny;
subsw(len, len, 4);
andw(len, len, 3);
br(LO, tiny);
// Use SIMD to do 4 bytes.
ldrs(vtmp2, post(src, 4));
zip1(vtmp3, T8B, vtmp2, vtmp1);
strd(vtmp3, post(dst, 8));
cbzw(len, done);
// Do the remaining bytes by steam.
bind(loop);
ldrb(tmp4, post(src, 1));
strh(tmp4, post(dst, 2));
subw(len, len, 1);
bind(tiny);
cbnz(len, loop);
bind(around);
b(done);
}
// Unpack the bytes 8 at a time.
bind(big);
andw(len, len, 7);
{
Label loop, around;
bind(loop);
ldrd(vtmp2, post(src, 8));
sub(rscratch1, rscratch1, 1);
zip1(vtmp3, T16B, vtmp2, vtmp1);
st1(vtmp3, T8H, post(dst, 16));
cbnz(rscratch1, loop);
bind(around);
}
// Do the tail of up to 8 bytes.
sub(src, src, 8);
add(src, src, len, ext::uxtw, 0);
ldrd(vtmp2, Address(src));
sub(dst, dst, 16);
add(dst, dst, len, ext::uxtw, 1);
zip1(vtmp3, T16B, vtmp2, vtmp1);
st1(vtmp3, T8H, Address(dst));
bind(done);
}
// Compress char[] array to byte[].
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
FloatRegister tmp1Reg, FloatRegister tmp2Reg,
FloatRegister tmp3Reg, FloatRegister tmp4Reg,
Register result) {
encode_iso_array(src, dst, len, result,
tmp1Reg, tmp2Reg, tmp3Reg, tmp4Reg);
cmp(len, zr);
csel(result, result, zr, EQ);
}
// get_thread() can be called anywhere inside generated code so we

View File

@ -1184,6 +1184,15 @@ public:
Register result, Register cnt1,
int elem_size, bool is_string);
void byte_array_inflate(Register src, Register dst, Register len,
FloatRegister vtmp1, FloatRegister vtmp2,
FloatRegister vtmp3, Register tmp4);
void char_array_compress(Register src, Register dst, Register len,
FloatRegister tmp1Reg, FloatRegister tmp2Reg,
FloatRegister tmp3Reg, FloatRegister tmp4Reg,
Register result);
void encode_iso_array(Register src, Register dst,
Register len, Register result,
FloatRegister Vtmp1, FloatRegister Vtmp2,