8307609: RISC-V: Added support for Extract, Compress, Expand and other nodes for Vector API

Co-authored-by: zifeihan <caogui@iscas.ac.cn>
Reviewed-by: fyang, fjiang
This commit is contained in:
Dingli Zhang 2023-05-19 03:09:13 +00:00 committed by Fei Yang
parent e520cdc882
commit 97ade57fb2
6 changed files with 1570 additions and 262 deletions

View File

@ -1311,6 +1311,9 @@ enum VectorMask {
INSN(vsrl_vi, 0b1010111, 0b011, 0b101000);
INSN(vsll_vi, 0b1010111, 0b011, 0b100101);
// Vector Slide Instructions
INSN(vslidedown_vi, 0b1010111, 0b011, 0b001111);
#undef INSN
#define INSN(NAME, op, funct3, funct6) \
@ -1511,6 +1514,9 @@ enum VectorMask {
INSN(vadd_vx, 0b1010111, 0b100, 0b000000);
INSN(vrsub_vx, 0b1010111, 0b100, 0b000011);
// Vector Slide Instructions
INSN(vslidedown_vx, 0b1010111, 0b100, 0b001111);
#undef INSN
#define INSN(NAME, op, funct3, vm, funct6) \
@ -1523,6 +1529,16 @@ enum VectorMask {
#undef INSN
#define INSN(NAME, op, funct3, vm, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1) { \
patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \
}
// Vector Floating-Point Merge Instruction
INSN(vfmerge_vfm, 0b1010111, 0b101, 0b0, 0b010111);
#undef INSN
#define INSN(NAME, op, funct3, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \
@ -1761,16 +1777,11 @@ enum Nf {
}
// Vector unordered indexed load instructions
INSN(vluxei8_v, 0b0000111, 0b000, 0b01, 0b0);
INSN(vluxei16_v, 0b0000111, 0b101, 0b01, 0b0);
INSN(vluxei32_v, 0b0000111, 0b110, 0b01, 0b0);
INSN(vluxei64_v, 0b0000111, 0b111, 0b01, 0b0);
// Vector ordered indexed load instructions
INSN(vloxei8_v, 0b0000111, 0b000, 0b11, 0b0);
INSN(vloxei16_v, 0b0000111, 0b101, 0b11, 0b0);
INSN(vloxei32_v, 0b0000111, 0b110, 0b11, 0b0);
INSN(vloxei64_v, 0b0000111, 0b111, 0b11, 0b0);
// Vector unordered indexed store instructions
INSN(vsuxei32_v, 0b0100111, 0b110, 0b01, 0b0);
#undef INSN
#define INSN(NAME, op, width, mop, mew) \

View File

@ -1639,33 +1639,65 @@ void C2_MacroAssembler::minmax_fp_v(VectorRegister dst, VectorRegister src1, Vec
vfadd_vv(dst, src2, src2, Assembler::v0_t);
}
// Set dst to NaN if any NaN input.
// The destination vector register elements corresponding to masked-off elements
// are handled with a mask-undisturbed policy.
void C2_MacroAssembler::minmax_fp_masked_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
VectorRegister vmask, VectorRegister tmp1, VectorRegister tmp2,
bool is_double, bool is_min, int vector_length) {
assert_different_registers(src1, src2, tmp1, tmp2);
vsetvli_helper(is_double ? T_DOUBLE : T_FLOAT, vector_length);
// Check vector elements of src1 and src2 for NaN.
vmfeq_vv(tmp1, src1, src1);
vmfeq_vv(tmp2, src2, src2);
vmandn_mm(v0, vmask, tmp1);
vfadd_vv(dst, src1, src1, Assembler::v0_t);
vmandn_mm(v0, vmask, tmp2);
vfadd_vv(dst, src2, src2, Assembler::v0_t);
vmand_mm(tmp2, tmp1, tmp2);
vmand_mm(v0, vmask, tmp2);
is_min ? vfmin_vv(dst, src1, src2, Assembler::v0_t)
: vfmax_vv(dst, src1, src2, Assembler::v0_t);
}
// Set dst to NaN if any NaN input.
void C2_MacroAssembler::reduce_minmax_fp_v(FloatRegister dst,
FloatRegister src1, VectorRegister src2,
VectorRegister tmp1, VectorRegister tmp2,
bool is_double, bool is_min, int vector_length) {
bool is_double, bool is_min, int vector_length, VectorMask vm) {
assert_different_registers(dst, src1);
assert_different_registers(src2, tmp1, tmp2);
Label L_done, L_NaN;
Label L_done, L_NaN_1, L_NaN_2;
// Set dst to src1 if src1 is NaN
is_double ? feq_d(t0, src1, src1)
: feq_s(t0, src1, src1);
beqz(t0, L_NaN_2);
vsetvli_helper(is_double ? T_DOUBLE : T_FLOAT, vector_length);
vfmv_s_f(tmp2, src1);
is_min ? vfredmin_vs(tmp1, src2, tmp2)
: vfredmax_vs(tmp1, src2, tmp2);
is_min ? vfredmin_vs(tmp1, src2, tmp2, vm)
: vfredmax_vs(tmp1, src2, tmp2, vm);
vfmv_f_s(dst, tmp1);
fsflags(zr);
// Checking NaNs
vmflt_vf(tmp2, src2, src1);
frflags(t0);
bnez(t0, L_NaN);
// Checking NaNs in src2
vmfne_vv(tmp1, src2, src2, vm);
vcpop_m(t0, tmp1, vm);
beqz(t0, L_done);
bind(L_NaN_1);
vfredusum_vs(tmp1, src2, tmp2, vm);
vfmv_f_s(dst, tmp1);
j(L_done);
bind(L_NaN);
vfmv_s_f(tmp2, src1);
vfredusum_vs(tmp1, src2, tmp2);
bind(L_NaN_2);
is_double ? fmv_d(dst, src1)
: fmv_s(dst, src1);
bind(L_done);
vfmv_f_s(dst, tmp1);
}
bool C2_MacroAssembler::in_scratch_emit_size() {
@ -1678,39 +1710,35 @@ bool C2_MacroAssembler::in_scratch_emit_size() {
return MacroAssembler::in_scratch_emit_size();
}
void C2_MacroAssembler::reduce_integral_v(Register dst, VectorRegister tmp,
Register src1, VectorRegister src2,
BasicType bt, int opc, int vector_length) {
void C2_MacroAssembler::reduce_integral_v(Register dst, Register src1,
VectorRegister src2, VectorRegister tmp,
int opc, BasicType bt, int vector_length, VectorMask vm) {
assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported element type");
vsetvli_helper(bt, vector_length);
vmv_s_x(tmp, src1);
switch (opc) {
case Op_AddReductionVI:
case Op_AddReductionVL:
vredsum_vs(tmp, src2, tmp);
vredsum_vs(tmp, src2, tmp, vm);
break;
case Op_AndReductionV:
vredand_vs(tmp, src2, tmp);
vredand_vs(tmp, src2, tmp, vm);
break;
case Op_OrReductionV:
vredor_vs(tmp, src2, tmp);
vredor_vs(tmp, src2, tmp, vm);
break;
case Op_XorReductionV:
vredxor_vs(tmp, src2, tmp);
vredxor_vs(tmp, src2, tmp, vm);
break;
case Op_MaxReductionV:
vredmax_vs(tmp, src2, tmp);
vredmax_vs(tmp, src2, tmp, vm);
break;
case Op_MinReductionV:
vredmin_vs(tmp, src2, tmp);
vredmin_vs(tmp, src2, tmp, vm);
break;
default:
ShouldNotReachHere();
}
vmv_x_s(dst, tmp);
}
@ -1728,8 +1756,8 @@ void C2_MacroAssembler::vsetvli_helper(BasicType bt, int vector_length, LMUL vlm
}
}
void C2_MacroAssembler::compare_integral_v(VectorRegister vd, BasicType bt, int vector_length,
VectorRegister src1, VectorRegister src2, int cond, VectorMask vm) {
void C2_MacroAssembler::compare_integral_v(VectorRegister vd, VectorRegister src1, VectorRegister src2,
int cond, BasicType bt, int vector_length, VectorMask vm) {
assert(is_integral_type(bt), "unsupported element type");
assert(vm == Assembler::v0_t ? vd != v0 : true, "should be different registers");
vsetvli_helper(bt, vector_length);
@ -1747,42 +1775,19 @@ void C2_MacroAssembler::compare_integral_v(VectorRegister vd, BasicType bt, int
}
}
void C2_MacroAssembler::compare_floating_point_v(VectorRegister vd, BasicType bt, int vector_length,
VectorRegister src1, VectorRegister src2,
VectorRegister tmp1, VectorRegister tmp2,
VectorRegister vmask, int cond, VectorMask vm) {
void C2_MacroAssembler::compare_fp_v(VectorRegister vd, VectorRegister src1, VectorRegister src2,
int cond, BasicType bt, int vector_length, VectorMask vm) {
assert(is_floating_point_type(bt), "unsupported element type");
assert(vd != v0, "should be different registers");
assert(vm == Assembler::v0_t ? vmask != v0 : true, "vmask should not be v0");
assert(vm == Assembler::v0_t ? vd != v0 : true, "should be different registers");
vsetvli_helper(bt, vector_length);
// Check vector elements of src1 and src2 for quiet and signaling NaN.
vfclass_v(tmp1, src1);
vfclass_v(tmp2, src2);
vsrl_vi(tmp1, tmp1, 8);
vsrl_vi(tmp2, tmp2, 8);
vmseq_vx(tmp1, tmp1, zr);
vmseq_vx(tmp2, tmp2, zr);
if (vm == Assembler::v0_t) {
vmand_mm(tmp2, tmp1, tmp2);
if (cond == BoolTest::ne) {
vmandn_mm(tmp1, vmask, tmp2);
}
vmand_mm(v0, vmask, tmp2);
} else {
vmand_mm(v0, tmp1, tmp2);
if (cond == BoolTest::ne) {
vmnot_m(tmp1, v0);
}
}
vmclr_m(vd);
switch (cond) {
case BoolTest::eq: vmfeq_vv(vd, src1, src2, Assembler::v0_t); break;
case BoolTest::ne: vmfne_vv(vd, src1, src2, Assembler::v0_t);
vmor_mm(vd, vd, tmp1); break;
case BoolTest::le: vmfle_vv(vd, src1, src2, Assembler::v0_t); break;
case BoolTest::ge: vmfge_vv(vd, src1, src2, Assembler::v0_t); break;
case BoolTest::lt: vmflt_vv(vd, src1, src2, Assembler::v0_t); break;
case BoolTest::gt: vmfgt_vv(vd, src1, src2, Assembler::v0_t); break;
case BoolTest::eq: vmfeq_vv(vd, src1, src2, vm); break;
case BoolTest::ne: vmfne_vv(vd, src1, src2, vm); break;
case BoolTest::le: vmfle_vv(vd, src1, src2, vm); break;
case BoolTest::ge: vmfge_vv(vd, src1, src2, vm); break;
case BoolTest::lt: vmflt_vv(vd, src1, src2, vm); break;
case BoolTest::gt: vmfgt_vv(vd, src1, src2, vm); break;
default:
assert(false, "unsupported compare condition");
ShouldNotReachHere();
@ -1863,10 +1868,8 @@ void C2_MacroAssembler::integer_narrow_v(VectorRegister dst, BasicType dst_bt, i
#define VFCVT_SAFE(VFLOATCVT) \
void C2_MacroAssembler::VFLOATCVT##_safe(VectorRegister dst, VectorRegister src) { \
assert_different_registers(dst, src); \
vfclass_v(v0, src); \
vxor_vv(dst, dst, dst); \
vsrl_vi(v0, v0, 8); \
vmseq_vx(v0, v0, zr); \
vmfeq_vv(v0, src, src); \
VFLOATCVT(dst, src, Assembler::v0_t); \
}
@ -1875,3 +1878,43 @@ VFCVT_SAFE(vfwcvt_rtz_x_f_v);
VFCVT_SAFE(vfncvt_rtz_x_f_w);
#undef VFCVT_SAFE
// Extract a scalar element from an vector at position 'idx'.
// The input elements in src are expected to be of integral type.
void C2_MacroAssembler::extract_v(Register dst, VectorRegister src, BasicType bt,
int idx, VectorRegister tmp) {
assert(is_integral_type(bt), "unsupported element type");
assert(idx >= 0, "idx cannot be negative");
// Only need the first element after vector slidedown
vsetvli_helper(bt, 1);
if (idx == 0) {
vmv_x_s(dst, src);
} else if (idx <= 31) {
vslidedown_vi(tmp, src, idx);
vmv_x_s(dst, tmp);
} else {
mv(t0, idx);
vslidedown_vx(tmp, src, t0);
vmv_x_s(dst, tmp);
}
}
// Extract a scalar element from an vector at position 'idx'.
// The input elements in src are expected to be of floating point type.
void C2_MacroAssembler::extract_fp_v(FloatRegister dst, VectorRegister src, BasicType bt,
int idx, VectorRegister tmp) {
assert(is_floating_point_type(bt), "unsupported element type");
assert(idx >= 0, "idx cannot be negative");
// Only need the first element after vector slidedown
vsetvli_helper(bt, 1);
if (idx == 0) {
vfmv_f_s(dst, src);
} else if (idx <= 31) {
vslidedown_vi(tmp, src, idx);
vfmv_f_s(dst, tmp);
} else {
mv(t0, idx);
vslidedown_vx(tmp, src, t0);
vfmv_f_s(dst, tmp);
}
}

View File

@ -189,23 +189,28 @@
VectorRegister src1, VectorRegister src2,
bool is_double, bool is_min, int vector_length);
void minmax_fp_masked_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
VectorRegister vmask, VectorRegister tmp1, VectorRegister tmp2,
bool is_double, bool is_min, int vector_length);
void reduce_minmax_fp_v(FloatRegister dst,
FloatRegister src1, VectorRegister src2,
VectorRegister tmp1, VectorRegister tmp2,
bool is_double, bool is_min, int vector_length);
bool is_double, bool is_min, int vector_length,
VectorMask vm = Assembler::unmasked);
void reduce_integral_v(Register dst, VectorRegister tmp,
Register src1, VectorRegister src2,
BasicType bt, int opc, int vector_length);
void reduce_integral_v(Register dst, Register src1,
VectorRegister src2, VectorRegister tmp,
int opc, BasicType bt, int vector_length,
VectorMask vm = Assembler::unmasked);
void vsetvli_helper(BasicType bt, int vector_length, LMUL vlmul = Assembler::m1, Register tmp = t0);
void compare_integral_v(VectorRegister dst, BasicType bt, int vector_length,
VectorRegister src1, VectorRegister src2, int cond, VectorMask vm = Assembler::unmasked);
void compare_integral_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, int cond,
BasicType bt, int vector_length, VectorMask vm = Assembler::unmasked);
void compare_floating_point_v(VectorRegister dst, BasicType bt, int vector_length,
VectorRegister src1, VectorRegister src2, VectorRegister tmp1, VectorRegister tmp2,
VectorRegister vmask, int cond, VectorMask vm = Assembler::unmasked);
void compare_fp_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, int cond,
BasicType bt, int vector_length, VectorMask vm = Assembler::unmasked);
// In Matcher::scalable_predicate_reg_slots,
// we assume each predicate register is one-eighth of the size of
@ -240,4 +245,7 @@
void vfwcvt_rtz_x_f_v_safe(VectorRegister dst, VectorRegister src);
void vfncvt_rtz_x_f_w_safe(VectorRegister dst, VectorRegister src);
void extract_v(Register dst, VectorRegister src, BasicType bt, int idx, VectorRegister tmp);
void extract_fp_v(FloatRegister dst, VectorRegister src, BasicType bt, int idx, VectorRegister tmp);
#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP

View File

@ -1273,12 +1273,12 @@ public:
vnsrl_wx(vd, vs, x0, vm);
}
inline void vneg_v(VectorRegister vd, VectorRegister vs) {
vrsub_vx(vd, vs, x0);
inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
vrsub_vx(vd, vs, x0, vm);
}
inline void vfneg_v(VectorRegister vd, VectorRegister vs) {
vfsgnjn_vv(vd, vs, vs);
inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
vfsgnjn_vv(vd, vs, vs, vm);
}
inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {

View File

@ -1887,85 +1887,6 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported.
}
const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
return match_rule_supported_vector(opcode, vlen, bt);
}
// Identify extra cases that we might want to provide match rules for vector nodes and
// other intrinsics guarded with vector length (vlen) and element type (bt).
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
return false;
}
return op_vec_supported(opcode);
}
const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
if (!UseRVV) {
return false;
}
switch (opcode) {
case Op_AddVB:
case Op_AddVS:
case Op_AddVI:
case Op_AddVL:
case Op_AddVF:
case Op_AddVD:
case Op_SubVB:
case Op_SubVS:
case Op_SubVI:
case Op_SubVL:
case Op_SubVF:
case Op_SubVD:
case Op_MulVB:
case Op_MulVS:
case Op_MulVI:
case Op_MulVL:
case Op_MulVF:
case Op_MulVD:
case Op_DivVF:
case Op_DivVD:
case Op_VectorLoadMask:
case Op_VectorMaskCmp:
case Op_AndVMask:
case Op_XorVMask:
case Op_OrVMask:
case Op_RShiftVB:
case Op_RShiftVS:
case Op_RShiftVI:
case Op_RShiftVL:
case Op_LShiftVB:
case Op_LShiftVS:
case Op_LShiftVI:
case Op_LShiftVL:
case Op_URShiftVB:
case Op_URShiftVS:
case Op_URShiftVI:
case Op_URShiftVL:
case Op_VectorBlend:
case Op_VectorReinterpret:
break;
case Op_LoadVector:
opcode = Op_LoadVectorMasked;
break;
case Op_StoreVector:
opcode = Op_StoreVectorMasked;
break;
default:
return false;
}
return match_rule_supported_vector(opcode, vlen, bt);
}
const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
return false;
}
const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}
const RegMask* Matcher::predicate_reg_mask(void) {
return &_VMASK_REG_mask;
}

File diff suppressed because it is too large Load Diff