8153310: AArch64: JEP 254: Implement byte_array_inflate
Reviewed-by: roland
This commit is contained in:
parent
66208f1fca
commit
cdcd378bd6
@ -14930,6 +14930,40 @@ instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
|
||||
%}
|
||||
|
||||
|
||||
// fast char[] to byte[] compression
|
||||
instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
|
||||
vRegD_V0 tmp1, vRegD_V1 tmp2,
|
||||
vRegD_V2 tmp3, vRegD_V3 tmp4,
|
||||
iRegI_R0 result, rFlagsReg cr)
|
||||
%{
|
||||
match(Set result (StrCompressedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
|
||||
|
||||
format %{ "String Compress $src,$dst -> $result // KILL R1, R2, R3, R4" %}
|
||||
ins_encode %{
|
||||
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister,
|
||||
$tmp3$$FloatRegister, $tmp4$$FloatRegister,
|
||||
$result$$Register);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// fast byte[] to char[] inflation
|
||||
instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
|
||||
vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dummy (StrInflatedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
|
||||
|
||||
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
|
||||
ins_encode %{
|
||||
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
// encode char[] to byte[] in ISO_8859_1
|
||||
instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
|
||||
vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
|
||||
|
@ -2245,18 +2245,18 @@ public:
|
||||
rf(Vn, 5), rf(Rd, 0);
|
||||
}
|
||||
|
||||
#define INSN(NAME, opc, opc2) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
|
||||
starti; \
|
||||
/* The encodings for the immh:immb fields (bits 22:16) are \
|
||||
* 0001 xxx 8B/16B, shift = xxx \
|
||||
* 001x xxx 4H/8H, shift = xxxx \
|
||||
* 01xx xxx 2S/4S, shift = xxxxx \
|
||||
* 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) \
|
||||
*/ \
|
||||
assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \
|
||||
f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \
|
||||
f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \
|
||||
#define INSN(NAME, opc, opc2) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
|
||||
starti; \
|
||||
/* The encodings for the immh:immb fields (bits 22:16) are \
|
||||
* 0001 xxx 8B/16B, shift = xxx \
|
||||
* 001x xxx 4H/8H, shift = xxxx \
|
||||
* 01xx xxx 2S/4S, shift = xxxxx \
|
||||
* 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) \
|
||||
*/ \
|
||||
assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \
|
||||
f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \
|
||||
f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \
|
||||
}
|
||||
|
||||
INSN(shl, 0, 0b010101);
|
||||
@ -2347,6 +2347,24 @@ public:
|
||||
f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
// AdvSIMD ZIP/UZP/TRN
|
||||
#define INSN(NAME, opcode) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
|
||||
starti; \
|
||||
f(0, 31), f(0b001110, 29, 24), f(0, 21), f(0b001110, 15, 10); \
|
||||
rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); \
|
||||
f(T & 1, 30), f(T >> 1, 23, 22); \
|
||||
}
|
||||
|
||||
INSN(uzp1, 0b001);
|
||||
INSN(trn1, 0b010);
|
||||
INSN(zip1, 0b011);
|
||||
INSN(uzp2, 0b101);
|
||||
INSN(trn2, 0b110);
|
||||
INSN(zip2, 0b111);
|
||||
|
||||
#undef INSN
|
||||
|
||||
// CRC32 instructions
|
||||
#define INSN(NAME, c, sf, sz) \
|
||||
void NAME(Register Rd, Register Rn, Register Rm) { \
|
||||
|
@ -4680,7 +4680,8 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
|
||||
}
|
||||
|
||||
|
||||
// encode char[] to byte[] in ISO_8859_1
|
||||
// Intrinsic for sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray and
|
||||
// java/lang/StringUTF16.compress.
|
||||
void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||
Register len, Register result,
|
||||
FloatRegister Vtmp1, FloatRegister Vtmp2,
|
||||
@ -4743,6 +4744,90 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||
|
||||
BIND(DONE);
|
||||
sub(result, result, len); // Return index where we stopped
|
||||
// Return len == 0 if we processed all
|
||||
// characters
|
||||
}
|
||||
|
||||
|
||||
// Inflate byte[] array to char[].
|
||||
void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
|
||||
Register tmp4) {
|
||||
Label big, done;
|
||||
|
||||
assert_different_registers(src, dst, len, tmp4, rscratch1);
|
||||
|
||||
fmovd(vtmp1 , zr);
|
||||
lsrw(rscratch1, len, 3);
|
||||
|
||||
cbnzw(rscratch1, big);
|
||||
|
||||
// Short string: less than 8 bytes.
|
||||
{
|
||||
Label loop, around, tiny;
|
||||
|
||||
subsw(len, len, 4);
|
||||
andw(len, len, 3);
|
||||
br(LO, tiny);
|
||||
|
||||
// Use SIMD to do 4 bytes.
|
||||
ldrs(vtmp2, post(src, 4));
|
||||
zip1(vtmp3, T8B, vtmp2, vtmp1);
|
||||
strd(vtmp3, post(dst, 8));
|
||||
|
||||
cbzw(len, done);
|
||||
|
||||
// Do the remaining bytes by steam.
|
||||
bind(loop);
|
||||
ldrb(tmp4, post(src, 1));
|
||||
strh(tmp4, post(dst, 2));
|
||||
subw(len, len, 1);
|
||||
|
||||
bind(tiny);
|
||||
cbnz(len, loop);
|
||||
|
||||
bind(around);
|
||||
b(done);
|
||||
}
|
||||
|
||||
// Unpack the bytes 8 at a time.
|
||||
bind(big);
|
||||
andw(len, len, 7);
|
||||
|
||||
{
|
||||
Label loop, around;
|
||||
|
||||
bind(loop);
|
||||
ldrd(vtmp2, post(src, 8));
|
||||
sub(rscratch1, rscratch1, 1);
|
||||
zip1(vtmp3, T16B, vtmp2, vtmp1);
|
||||
st1(vtmp3, T8H, post(dst, 16));
|
||||
cbnz(rscratch1, loop);
|
||||
|
||||
bind(around);
|
||||
}
|
||||
|
||||
// Do the tail of up to 8 bytes.
|
||||
sub(src, src, 8);
|
||||
add(src, src, len, ext::uxtw, 0);
|
||||
ldrd(vtmp2, Address(src));
|
||||
sub(dst, dst, 16);
|
||||
add(dst, dst, len, ext::uxtw, 1);
|
||||
zip1(vtmp3, T16B, vtmp2, vtmp1);
|
||||
st1(vtmp3, T8H, Address(dst));
|
||||
|
||||
bind(done);
|
||||
}
|
||||
|
||||
// Compress char[] array to byte[].
|
||||
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
|
||||
FloatRegister tmp1Reg, FloatRegister tmp2Reg,
|
||||
FloatRegister tmp3Reg, FloatRegister tmp4Reg,
|
||||
Register result) {
|
||||
encode_iso_array(src, dst, len, result,
|
||||
tmp1Reg, tmp2Reg, tmp3Reg, tmp4Reg);
|
||||
cmp(len, zr);
|
||||
csel(result, result, zr, EQ);
|
||||
}
|
||||
|
||||
// get_thread() can be called anywhere inside generated code so we
|
||||
|
@ -1184,6 +1184,15 @@ public:
|
||||
Register result, Register cnt1,
|
||||
int elem_size, bool is_string);
|
||||
|
||||
void byte_array_inflate(Register src, Register dst, Register len,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
FloatRegister vtmp3, Register tmp4);
|
||||
|
||||
void char_array_compress(Register src, Register dst, Register len,
|
||||
FloatRegister tmp1Reg, FloatRegister tmp2Reg,
|
||||
FloatRegister tmp3Reg, FloatRegister tmp4Reg,
|
||||
Register result);
|
||||
|
||||
void encode_iso_array(Register src, Register dst,
|
||||
Register len, Register result,
|
||||
FloatRegister Vtmp1, FloatRegister Vtmp2,
|
||||
|
Loading…
Reference in New Issue
Block a user