8306966: RISC-V: Support vector cast node for Vector API

Co-authored-by: Dingli Zhang <dingli@iscas.ac.cn>
Reviewed-by: fyang, fjiang
This commit is contained in:
Gui Cao 2023-05-08 01:15:12 +00:00 committed by Fei Yang
parent 0dca573ca5
commit 495f2688d6
5 changed files with 673 additions and 299 deletions

View File

@ -1230,13 +1230,19 @@ enum VectorMask {
INSN(viota_m, 0b1010111, 0b010, 0b10000, 0b010100);
// Vector Single-Width Floating-Point/Integer Type-Convert Instructions
INSN(vfcvt_xu_f_v, 0b1010111, 0b001, 0b00000, 0b010010);
INSN(vfcvt_x_f_v, 0b1010111, 0b001, 0b00001, 0b010010);
INSN(vfcvt_f_xu_v, 0b1010111, 0b001, 0b00010, 0b010010);
INSN(vfcvt_f_x_v, 0b1010111, 0b001, 0b00011, 0b010010);
INSN(vfcvt_rtz_xu_f_v, 0b1010111, 0b001, 0b00110, 0b010010);
INSN(vfcvt_f_x_v, 0b1010111, 0b001, 0b00011, 0b010010);
INSN(vfcvt_rtz_x_f_v, 0b1010111, 0b001, 0b00111, 0b010010);
// Vector Widening Floating-Point/Integer Type-Convert Instructions
INSN(vfwcvt_f_x_v, 0b1010111, 0b001, 0b01011, 0b010010);
INSN(vfwcvt_f_f_v, 0b1010111, 0b001, 0b01100, 0b010010);
INSN(vfwcvt_rtz_x_f_v, 0b1010111, 0b001, 0b01111, 0b010010);
// Vector Narrowing Floating-Point/Integer Type-Convert Instructions
INSN(vfncvt_f_x_w, 0b1010111, 0b001, 0b10011, 0b010010);
INSN(vfncvt_f_f_w, 0b1010111, 0b001, 0b10100, 0b010010);
INSN(vfncvt_rtz_x_f_w, 0b1010111, 0b001, 0b10111, 0b010010);
// Vector Floating-Point Instruction
INSN(vfsqrt_v, 0b1010111, 0b001, 0b00000, 0b010011);
INSN(vfclass_v, 0b1010111, 0b001, 0b10000, 0b010011);
@ -1431,6 +1437,9 @@ enum VectorMask {
INSN(vsub_vv, 0b1010111, 0b000, 0b000010);
INSN(vadd_vv, 0b1010111, 0b000, 0b000000);
// Vector Register Gather Instructions
INSN(vrgather_vv, 0b1010111, 0b000, 0b001100);
#undef INSN

View File

@ -1617,10 +1617,10 @@ void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1,
// Set dst to NaN if any NaN input.
void C2_MacroAssembler::minmax_fp_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
bool is_double, bool is_min, int length_in_bytes) {
bool is_double, bool is_min, int vector_length) {
assert_different_registers(dst, src1, src2);
rvv_vsetvli(is_double ? T_DOUBLE : T_FLOAT, length_in_bytes);
vsetvli_helper(is_double ? T_DOUBLE : T_FLOAT, vector_length);
is_min ? vfmin_vv(dst, src1, src2)
: vfmax_vv(dst, src1, src2);
@ -1635,11 +1635,11 @@ void C2_MacroAssembler::minmax_fp_v(VectorRegister dst, VectorRegister src1, Vec
void C2_MacroAssembler::reduce_minmax_fp_v(FloatRegister dst,
FloatRegister src1, VectorRegister src2,
VectorRegister tmp1, VectorRegister tmp2,
bool is_double, bool is_min, int length_in_bytes) {
bool is_double, bool is_min, int vector_length) {
assert_different_registers(src2, tmp1, tmp2);
Label L_done, L_NaN;
rvv_vsetvli(is_double ? T_DOUBLE : T_FLOAT, length_in_bytes);
vsetvli_helper(is_double ? T_DOUBLE : T_FLOAT, vector_length);
vfmv_s_f(tmp2, src1);
is_min ? vfredmin_vs(tmp1, src2, tmp2)
@ -1670,12 +1670,12 @@ bool C2_MacroAssembler::in_scratch_emit_size() {
return MacroAssembler::in_scratch_emit_size();
}
void C2_MacroAssembler::rvv_reduce_integral(Register dst, VectorRegister tmp,
Register src1, VectorRegister src2,
BasicType bt, int opc, int length_in_bytes) {
void C2_MacroAssembler::reduce_integral_v(Register dst, VectorRegister tmp,
Register src1, VectorRegister src2,
BasicType bt, int opc, int vector_length) {
assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported element type");
rvv_vsetvli(bt, length_in_bytes);
vsetvli_helper(bt, vector_length);
vmv_s_x(tmp, src1);
@ -1707,27 +1707,24 @@ void C2_MacroAssembler::rvv_reduce_integral(Register dst, VectorRegister tmp,
}
// Set vl and vtype for full and partial vector operations.
// (vlmul = m1, vma = mu, vta = tu, vill = false)
void C2_MacroAssembler::rvv_vsetvli(BasicType bt, int length_in_bytes, Register tmp) {
// (vma = mu, vta = tu, vill = false)
void C2_MacroAssembler::vsetvli_helper(BasicType bt, int vector_length, LMUL vlmul, Register tmp) {
Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
if (length_in_bytes == MaxVectorSize) {
vsetvli(tmp, x0, sew);
if (vector_length <= 31) {
vsetivli(tmp, vector_length, sew, vlmul);
} else if (vector_length == (MaxVectorSize / type2aelembytes(bt))) {
vsetvli(tmp, x0, sew, vlmul);
} else {
int num_elements = length_in_bytes / type2aelembytes(bt);
if (num_elements <= 31) {
vsetivli(tmp, num_elements, sew);
} else {
mv(tmp, num_elements);
vsetvli(tmp, tmp, sew);
}
mv(tmp, vector_length);
vsetvli(tmp, tmp, sew, vlmul);
}
}
void C2_MacroAssembler::compare_integral_v(VectorRegister vd, BasicType bt, int length_in_bytes,
void C2_MacroAssembler::compare_integral_v(VectorRegister vd, BasicType bt, int vector_length,
VectorRegister src1, VectorRegister src2, int cond, VectorMask vm) {
assert(is_integral_type(bt), "unsupported element type");
assert(vm == Assembler::v0_t ? vd != v0 : true, "should be different registers");
rvv_vsetvli(bt, length_in_bytes);
vsetvli_helper(bt, vector_length);
vmclr_m(vd);
switch (cond) {
case BoolTest::eq: vmseq_vv(vd, src1, src2, vm); break;
@ -1742,15 +1739,15 @@ void C2_MacroAssembler::compare_integral_v(VectorRegister vd, BasicType bt, int
}
}
void C2_MacroAssembler::compare_floating_point_v(VectorRegister vd, BasicType bt, int length_in_bytes,
void C2_MacroAssembler::compare_floating_point_v(VectorRegister vd, BasicType bt, int vector_length,
VectorRegister src1, VectorRegister src2,
VectorRegister tmp1, VectorRegister tmp2,
VectorRegister vmask, int cond, VectorMask vm) {
assert(is_floating_point_type(bt), "unsupported element type");
assert(vd != v0, "should be different registers");
assert(vm == Assembler::v0_t ? vmask != v0 : true, "vmask should not be v0");
rvv_vsetvli(bt, length_in_bytes);
// Check vector elements of src1 and src2 for quiet or signaling NaN.
vsetvli_helper(bt, vector_length);
// Check vector elements of src1 and src2 for quiet and signaling NaN.
vfclass_v(tmp1, src1);
vfclass_v(tmp2, src2);
vsrl_vi(tmp1, tmp1, 8);
@ -1782,4 +1779,91 @@ void C2_MacroAssembler::compare_floating_point_v(VectorRegister vd, BasicType bt
assert(false, "unsupported compare condition");
ShouldNotReachHere();
}
}
}
void C2_MacroAssembler::integer_extend_v(VectorRegister dst, BasicType dst_bt, int vector_length,
VectorRegister src, BasicType src_bt) {
assert(type2aelembytes(dst_bt) > type2aelembytes(src_bt) && type2aelembytes(dst_bt) <= 8 && type2aelembytes(src_bt) <= 4, "invalid element size");
assert(dst_bt != T_FLOAT && dst_bt != T_DOUBLE && src_bt != T_FLOAT && src_bt != T_DOUBLE, "unsupported element type");
// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#52-vector-operands
// The destination EEW is greater than the source EEW, the source EMUL is at least 1,
// and the overlap is in the highest-numbered part of the destination register group.
// Since LMUL=1, vd and vs cannot be the same.
assert_different_registers(dst, src);
vsetvli_helper(dst_bt, vector_length);
if (src_bt == T_BYTE) {
switch (dst_bt) {
case T_SHORT:
vsext_vf2(dst, src);
break;
case T_INT:
vsext_vf4(dst, src);
break;
case T_LONG:
vsext_vf8(dst, src);
break;
default:
ShouldNotReachHere();
}
} else if (src_bt == T_SHORT) {
if (dst_bt == T_INT) {
vsext_vf2(dst, src);
} else {
vsext_vf4(dst, src);
}
} else if (src_bt == T_INT) {
vsext_vf2(dst, src);
}
}
// Vector narrow from src to dst with specified element sizes.
// High part of dst vector will be filled with zero.
void C2_MacroAssembler::integer_narrow_v(VectorRegister dst, BasicType dst_bt, int vector_length,
VectorRegister src, BasicType src_bt) {
assert(type2aelembytes(dst_bt) < type2aelembytes(src_bt) && type2aelembytes(dst_bt) <= 4 && type2aelembytes(src_bt) <= 8, "invalid element size");
assert(dst_bt != T_FLOAT && dst_bt != T_DOUBLE && src_bt != T_FLOAT && src_bt != T_DOUBLE, "unsupported element type");
mv(t0, vector_length);
if (src_bt == T_LONG) {
// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#117-vector-narrowing-integer-right-shift-instructions
// Future extensions might add support for versions that narrow to a destination that is 1/4 the width of the source.
// So we can currently only scale down by 1/2 the width at a time.
vsetvli(t0, t0, Assembler::e32, Assembler::mf2);
vncvt_x_x_w(dst, src);
if (dst_bt == T_SHORT || dst_bt == T_BYTE) {
vsetvli(t0, t0, Assembler::e16, Assembler::mf2);
vncvt_x_x_w(dst, dst);
if (dst_bt == T_BYTE) {
vsetvli(t0, t0, Assembler::e8, Assembler::mf2);
vncvt_x_x_w(dst, dst);
}
}
} else if (src_bt == T_INT) {
// T_SHORT
vsetvli(t0, t0, Assembler::e16, Assembler::mf2);
vncvt_x_x_w(dst, src);
if (dst_bt == T_BYTE) {
vsetvli(t0, t0, Assembler::e8, Assembler::mf2);
vncvt_x_x_w(dst, dst);
}
} else if (src_bt == T_SHORT) {
vsetvli(t0, t0, Assembler::e8, Assembler::mf2);
vncvt_x_x_w(dst, src);
}
}
#define VFCVT_SAFE(VFLOATCVT) \
void C2_MacroAssembler::VFLOATCVT##_safe(VectorRegister dst, VectorRegister src) { \
assert_different_registers(dst, src); \
vfclass_v(v0, src); \
vxor_vv(dst, dst, dst); \
vsrl_vi(v0, v0, 8); \
vmseq_vx(v0, v0, zr); \
VFLOATCVT(dst, src, Assembler::v0_t); \
}
VFCVT_SAFE(vfcvt_rtz_x_f_v);
VFCVT_SAFE(vfwcvt_rtz_x_f_v);
VFCVT_SAFE(vfncvt_rtz_x_f_w);
#undef VFCVT_SAFE

View File

@ -187,23 +187,23 @@
void minmax_fp_v(VectorRegister dst,
VectorRegister src1, VectorRegister src2,
bool is_double, bool is_min, int length_in_bytes);
bool is_double, bool is_min, int vector_length);
void reduce_minmax_fp_v(FloatRegister dst,
FloatRegister src1, VectorRegister src2,
VectorRegister tmp1, VectorRegister tmp2,
bool is_double, bool is_min, int length_in_bytes);
bool is_double, bool is_min, int vector_length);
void rvv_reduce_integral(Register dst, VectorRegister tmp,
Register src1, VectorRegister src2,
BasicType bt, int opc, int length_in_bytes);
void reduce_integral_v(Register dst, VectorRegister tmp,
Register src1, VectorRegister src2,
BasicType bt, int opc, int vector_length);
void rvv_vsetvli(BasicType bt, int length_in_bytes, Register tmp = t0);
void vsetvli_helper(BasicType bt, int vector_length, LMUL vlmul = Assembler::m1, Register tmp = t0);
void compare_integral_v(VectorRegister dst, BasicType bt, int length_in_bytes,
void compare_integral_v(VectorRegister dst, BasicType bt, int vector_length,
VectorRegister src1, VectorRegister src2, int cond, VectorMask vm = Assembler::unmasked);
void compare_floating_point_v(VectorRegister dst, BasicType bt, int length_in_bytes,
void compare_floating_point_v(VectorRegister dst, BasicType bt, int vector_length,
VectorRegister src1, VectorRegister src2, VectorRegister tmp1, VectorRegister tmp2,
VectorRegister vmask, int cond, VectorMask vm = Assembler::unmasked);
@ -211,13 +211,13 @@
// we assume each predicate register is one-eighth of the size of
// scalable vector register, one mask bit per vector byte.
void spill_vmask(VectorRegister v, int offset){
rvv_vsetvli(T_BYTE, MaxVectorSize >> 3);
vsetvli_helper(T_BYTE, MaxVectorSize >> 3);
add(t0, sp, offset);
vse8_v(v, t0);
}
void unspill_vmask(VectorRegister v, int offset){
rvv_vsetvli(T_BYTE, MaxVectorSize >> 3);
vsetvli_helper(T_BYTE, MaxVectorSize >> 3);
add(t0, sp, offset);
vle8_v(v, t0);
}
@ -230,4 +230,14 @@
}
}
void integer_extend_v(VectorRegister dst, BasicType dst_bt, int vector_length,
VectorRegister src, BasicType src_bt);
void integer_narrow_v(VectorRegister dst, BasicType dst_bt, int vector_length,
VectorRegister src, BasicType src_bt);
void vfcvt_rtz_x_f_v_safe(VectorRegister dst, VectorRegister src);
void vfwcvt_rtz_x_f_v_safe(VectorRegister dst, VectorRegister src);
void vfncvt_rtz_x_f_w_safe(VectorRegister dst, VectorRegister src);
#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP

View File

@ -1944,6 +1944,7 @@ const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, Bas
case Op_URShiftVI:
case Op_URShiftVL:
case Op_VectorBlend:
case Op_VectorReinterpret:
break;
case Op_LoadVector:
opcode = Op_LoadVectorMasked;
@ -2030,7 +2031,10 @@ const int Matcher::min_vector_size(const BasicType bt) {
int max_size = max_vector_size(bt);
// Limit the min vector size to 8 bytes.
int size = 8 / type2aelembytes(bt);
if (bt == T_BOOLEAN) {
if (bt == T_BYTE) {
// To support vector api shuffle/rearrange.
size = 4;
} else if (bt == T_BOOLEAN) {
// To support vector api load/store mask.
size = 2;
}

File diff suppressed because it is too large Load Diff