8334474: RISC-V: verify perf of ExpandBits/CompressBits (rvv)
Reviewed-by: fyang, rehn, luhenry
This commit is contained in:
parent
e29b0edc97
commit
133419177d
@ -2339,83 +2339,6 @@ void C2_MacroAssembler::signum_fp_v(VectorRegister dst, VectorRegister one, Basi
|
||||
vfsgnj_vv(dst, one, dst, v0_t);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::compress_bits_v(Register dst, Register src, Register mask, bool is_long) {
|
||||
Assembler::SEW sew = is_long ? Assembler::e64 : Assembler::e32;
|
||||
// intrinsic is enabled when MaxVectorSize >= 16
|
||||
Assembler::LMUL lmul = is_long ? Assembler::m4 : Assembler::m2;
|
||||
long len = is_long ? 64 : 32;
|
||||
|
||||
// load the src data(in bits) to be compressed.
|
||||
vsetivli(x0, 1, sew, Assembler::m1);
|
||||
vmv_s_x(v0, src);
|
||||
// reset the src data(in bytes) to zero.
|
||||
mv(t0, len);
|
||||
vsetvli(x0, t0, Assembler::e8, lmul);
|
||||
vmv_v_i(v4, 0);
|
||||
// convert the src data from bits to bytes.
|
||||
vmerge_vim(v4, v4, 1); // v0 as the implicit mask register
|
||||
// reset the dst data(in bytes) to zero.
|
||||
vmv_v_i(v8, 0);
|
||||
// load the mask data(in bits).
|
||||
vsetivli(x0, 1, sew, Assembler::m1);
|
||||
vmv_s_x(v0, mask);
|
||||
// compress the src data(in bytes) to dst(in bytes).
|
||||
vsetvli(x0, t0, Assembler::e8, lmul);
|
||||
vcompress_vm(v8, v4, v0);
|
||||
// convert the dst data from bytes to bits.
|
||||
vmseq_vi(v0, v8, 1);
|
||||
// store result back.
|
||||
vsetivli(x0, 1, sew, Assembler::m1);
|
||||
vmv_x_s(dst, v0);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::compress_bits_i_v(Register dst, Register src, Register mask) {
|
||||
compress_bits_v(dst, src, mask, /* is_long */ false);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::compress_bits_l_v(Register dst, Register src, Register mask) {
|
||||
compress_bits_v(dst, src, mask, /* is_long */ true);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::expand_bits_v(Register dst, Register src, Register mask, bool is_long) {
|
||||
Assembler::SEW sew = is_long ? Assembler::e64 : Assembler::e32;
|
||||
// intrinsic is enabled when MaxVectorSize >= 16
|
||||
Assembler::LMUL lmul = is_long ? Assembler::m4 : Assembler::m2;
|
||||
long len = is_long ? 64 : 32;
|
||||
|
||||
// load the src data(in bits) to be expanded.
|
||||
vsetivli(x0, 1, sew, Assembler::m1);
|
||||
vmv_s_x(v0, src);
|
||||
// reset the src data(in bytes) to zero.
|
||||
mv(t0, len);
|
||||
vsetvli(x0, t0, Assembler::e8, lmul);
|
||||
vmv_v_i(v4, 0);
|
||||
// convert the src data from bits to bytes.
|
||||
vmerge_vim(v4, v4, 1); // v0 as implicit mask register
|
||||
// reset the dst data(in bytes) to zero.
|
||||
vmv_v_i(v12, 0);
|
||||
// load the mask data(in bits).
|
||||
vsetivli(x0, 1, sew, Assembler::m1);
|
||||
vmv_s_x(v0, mask);
|
||||
// expand the src data(in bytes) to dst(in bytes).
|
||||
vsetvli(x0, t0, Assembler::e8, lmul);
|
||||
viota_m(v8, v0);
|
||||
vrgather_vv(v12, v4, v8, VectorMask::v0_t); // v0 as implicit mask register
|
||||
// convert the dst data from bytes to bits.
|
||||
vmseq_vi(v0, v12, 1);
|
||||
// store result back.
|
||||
vsetivli(x0, 1, sew, Assembler::m1);
|
||||
vmv_x_s(dst, v0);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::expand_bits_i_v(Register dst, Register src, Register mask) {
|
||||
expand_bits_v(dst, src, mask, /* is_long */ false);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::expand_bits_l_v(Register dst, Register src, Register mask) {
|
||||
expand_bits_v(dst, src, mask, /* is_long */ true);
|
||||
}
|
||||
|
||||
// j.l.Math.round(float)
|
||||
// Returns the closest int to the argument, with ties rounding to positive infinity.
|
||||
// We need to handle 3 special cases defined by java api spec:
|
||||
|
@ -39,9 +39,6 @@
|
||||
VectorRegister vrs,
|
||||
bool is_latin, Label& DONE, Assembler::LMUL lmul);
|
||||
|
||||
void compress_bits_v(Register dst, Register src, Register mask, bool is_long);
|
||||
void expand_bits_v(Register dst, Register src, Register mask, bool is_long);
|
||||
|
||||
public:
|
||||
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
|
||||
void fast_lock(Register object, Register box,
|
||||
@ -184,13 +181,6 @@
|
||||
|
||||
// intrinsic methods implemented by rvv instructions
|
||||
|
||||
// compress bits, i.e. j.l.Integer/Long::compress.
|
||||
void compress_bits_i_v(Register dst, Register src, Register mask);
|
||||
void compress_bits_l_v(Register dst, Register src, Register mask);
|
||||
// expand bits, i.e. j.l.Integer/Long::expand.
|
||||
void expand_bits_i_v(Register dst, Register src, Register mask);
|
||||
void expand_bits_l_v(Register dst, Register src, Register mask);
|
||||
|
||||
void java_round_float_v(VectorRegister dst, VectorRegister src, FloatRegister ftmp, BasicType bt, uint vector_length);
|
||||
void java_round_double_v(VectorRegister dst, VectorRegister src, FloatRegister ftmp, BasicType bt, uint vector_length);
|
||||
|
||||
|
@ -942,26 +942,6 @@ reg_class v11_reg(
|
||||
V11, V11_H, V11_J, V11_K
|
||||
);
|
||||
|
||||
// class for vector register v12
|
||||
reg_class v12_reg(
|
||||
V12, V12_H, V12_J, V12_K
|
||||
);
|
||||
|
||||
// class for vector register v13
|
||||
reg_class v13_reg(
|
||||
V13, V13_H, V13_J, V13_K
|
||||
);
|
||||
|
||||
// class for vector register v14
|
||||
reg_class v14_reg(
|
||||
V14, V14_H, V14_J, V14_K
|
||||
);
|
||||
|
||||
// class for vector register v15
|
||||
reg_class v15_reg(
|
||||
V15, V15_H, V15_J, V15_K
|
||||
);
|
||||
|
||||
// class for condition codes
|
||||
reg_class reg_flags(RFLAGS);
|
||||
|
||||
@ -1896,9 +1876,6 @@ bool Matcher::match_rule_supported(int opcode) {
|
||||
}
|
||||
break;
|
||||
|
||||
case Op_ExpandBits: // fall through
|
||||
case Op_CompressBits: // fall through
|
||||
guarantee(UseRVV == (MaxVectorSize >= 16), "UseRVV and MaxVectorSize not matched");
|
||||
case Op_StrCompressedCopy: // fall through
|
||||
case Op_StrInflatedCopy: // fall through
|
||||
case Op_CountPositives: // fall through
|
||||
@ -3541,46 +3518,6 @@ operand vReg_V11()
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vReg_V12()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(v12_reg));
|
||||
match(VecA);
|
||||
match(vReg);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vReg_V13()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(v13_reg));
|
||||
match(VecA);
|
||||
match(vReg);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vReg_V14()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(v14_reg));
|
||||
match(VecA);
|
||||
match(vReg);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vReg_V15()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(v15_reg));
|
||||
match(VecA);
|
||||
match(vReg);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vRegMask()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vmask_reg));
|
||||
|
@ -3843,116 +3843,6 @@ instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
// CompressBits of Long & Integer
|
||||
|
||||
instruct compressBitsI(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask, vRegMask_V0 v0,
|
||||
vReg_V4 v4, vReg_V5 v5, vReg_V8 v8, vReg_V9 v9) %{
|
||||
match(Set dst (CompressBits src mask));
|
||||
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v8, TEMP v9);
|
||||
format %{ "vsetivli x0, 1, e32, m1, tu, mu\t#@compressBitsI\n\t"
|
||||
"vmv.s.x $v0, $src\n\t"
|
||||
"mv t0, 32\n\t"
|
||||
"vsetvli x0, t0, e8, m2, tu, mu\n\t"
|
||||
"vmv.v.i $v4, 0\n\t"
|
||||
"vmerge.vim $v4, $v4, 1, $v0\n\t"
|
||||
"vmv.v.i $v8, 0\n\t"
|
||||
"vsetivli x0, 1, e32, m1, tu, mu\n\t"
|
||||
"vmv.s.x $v0, $mask\n\t"
|
||||
"vsetvli x0, t0, e8, m2, tu, mu\n\t"
|
||||
"vcompress.vm $v8, $v4, $v0\n\t"
|
||||
"vmseq.vi $v0, $v8, 1\n\t"
|
||||
"vsetivli x0, 1, e32, m1, tu, mu\n\t"
|
||||
"vmv.x.s $dst, $v0\t#@compressBitsI\n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ compress_bits_i_v(as_Register($dst$$reg), as_Register($src$$reg), as_Register($mask$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compressBitsL(iRegLNoSp dst, iRegL src, iRegL mask, vRegMask_V0 v0,
|
||||
vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
|
||||
vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11) %{
|
||||
match(Set dst (CompressBits src mask));
|
||||
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9, TEMP v10, TEMP v11);
|
||||
format %{ "vsetivli x0, 1, e64, m1, tu, mu\t#@compressBitsL\n\t"
|
||||
"vmv.s.x $v0, $src\n\t"
|
||||
"mv t0, 64\n\t"
|
||||
"vsetvli x0, t0, e8, m4, tu, mu\n\t"
|
||||
"vmv.v.i $v4, 0\n\t"
|
||||
"vmerge.vim $v4, $v4, 1, $v0\n\t"
|
||||
"vmv.v.i $v8, 0\n\t"
|
||||
"vsetivli x0, 1, e64, m1, tu, mu\n\t"
|
||||
"vmv.s.x $v0, $mask\n\t"
|
||||
"vsetvli x0, t0, e8, m4, tu, mu\n\t"
|
||||
"vcompress.vm $v8, $v4, $v0\n\t"
|
||||
"vmseq.vi $v0, $v8, 1\n\t"
|
||||
"vsetivli x0, 1, e64, m1, tu, mu\n\t"
|
||||
"vmv.x.s $dst, $v0\t#@compressBitsL\n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ compress_bits_l_v(as_Register($dst$$reg), as_Register($src$$reg), as_Register($mask$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ExpandBits of Long & Integer
|
||||
|
||||
instruct expandBitsI(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask, vRegMask_V0 v0,
|
||||
vReg_V4 v4, vReg_V5 v5, vReg_V8 v8, vReg_V9 v9, vReg_V12 v12, vReg_V13 v13) %{
|
||||
match(Set dst (ExpandBits src mask));
|
||||
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v8, TEMP v9, TEMP v12, TEMP v13);
|
||||
format %{ "vsetivli x0, 1, e32, m1, tu, mu\t#@expandBitsI\n\t"
|
||||
"vmv.s.x $v0, $src\n\t"
|
||||
"mv t0, 32\n\t"
|
||||
"vsetvli x0, t0, e8, m2, tu, mu\n\t"
|
||||
"vmv.v.i $v4, 0\n\t"
|
||||
"vmerge.vim $v4, $v4, 1, $v0\n\t"
|
||||
"vmv.v.i $v12, 0\n\t"
|
||||
"vsetivli x0, 1, e32, m1, tu, mu\n\t"
|
||||
"vmv.s.x $v0, $mask\n\t"
|
||||
"vsetvli x0, t0, e8, m2, tu, mu\n\t"
|
||||
"viota.m $v8, $v0\n\t"
|
||||
"vrgather.vv $v12, $v4, $v8, $v0.t\n\t"
|
||||
"vmseq.vi $v0, $v12, 1\n\t"
|
||||
"vsetivli x0, 1, e32, m1, tu, mu\n\t"
|
||||
"vmv.x.s $dst, $v0\t#@expandBitsI\n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ expand_bits_i_v(as_Register($dst$$reg), as_Register($src$$reg), as_Register($mask$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct expandBitsL(iRegLNoSp dst, iRegL src, iRegL mask, vRegMask_V0 v0,
|
||||
vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
|
||||
vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11,
|
||||
vReg_V12 v12, vReg_V13 v13, vReg_V14 v14, vReg_V15 v15) %{
|
||||
match(Set dst (ExpandBits src mask));
|
||||
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9, TEMP v10, TEMP v11,
|
||||
TEMP v12, TEMP v13, TEMP v14, TEMP v15);
|
||||
format %{ "vsetivli x0, 1, e64, m1, tu, mu\t#@expandBitsL\n\t"
|
||||
"vmv.s.x $v0, $src\n\t"
|
||||
"mv t0, 64\n\t"
|
||||
"vsetvli x0, t0, e8, m4, tu, mu\n\t"
|
||||
"vmv.v.i $v4, 0\n\t"
|
||||
"vmerge.vim $v4, $v4, 1, $v0\n\t"
|
||||
"vmv.v.i $v12, 0\n\t"
|
||||
"vsetivli x0, 1, e64, m1, tu, mu\n\t"
|
||||
"vmv.s.x $v0, $mask\n\t"
|
||||
"vsetvli x0, t0, e8, m4, tu, mu\n\t"
|
||||
"viota.m $v8, $v0\n\t"
|
||||
"vrgather.vv $v12, $v4, $v8, $v0.t\n\t"
|
||||
"vmseq.vi $v0, $v12, 1\n\t"
|
||||
"vsetivli x0, 1, e64, m1, tu, mu\n\t"
|
||||
"vmv.x.s $dst, $v0\t#@expandBitsL\n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ expand_bits_l_v(as_Register($dst$$reg), as_Register($src$$reg), as_Register($mask$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Vector Load Const
|
||||
instruct vloadcon(vReg dst, immI0 src) %{
|
||||
match(Set dst (VectorLoadConst src));
|
||||
|
@ -30,8 +30,7 @@
|
||||
* @requires (((os.arch=="x86" | os.arch=="amd64" | os.arch=="x86_64") &
|
||||
* (vm.cpu.features ~= ".*bmi2.*" & vm.cpu.features ~= ".*bmi1.*" &
|
||||
* vm.cpu.features ~= ".*sse2.*")) |
|
||||
* (os.arch=="aarch64" & vm.cpu.features ~= ".*svebitperm.*") |
|
||||
* (os.arch=="riscv64" & vm.cpu.features ~= ".*rvv.*"))
|
||||
* (os.arch=="aarch64" & vm.cpu.features ~= ".*svebitperm.*"))
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.intrinsics.TestBitShuffleOpers
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user