8306966: RISC-V: Support vector cast node for Vector API
Co-authored-by: Dingli Zhang <dingli@iscas.ac.cn> Reviewed-by: fyang, fjiang
This commit is contained in:
parent
0dca573ca5
commit
495f2688d6
@ -1230,13 +1230,19 @@ enum VectorMask {
|
||||
INSN(viota_m, 0b1010111, 0b010, 0b10000, 0b010100);
|
||||
|
||||
// Vector Single-Width Floating-Point/Integer Type-Convert Instructions
|
||||
INSN(vfcvt_xu_f_v, 0b1010111, 0b001, 0b00000, 0b010010);
|
||||
INSN(vfcvt_x_f_v, 0b1010111, 0b001, 0b00001, 0b010010);
|
||||
INSN(vfcvt_f_xu_v, 0b1010111, 0b001, 0b00010, 0b010010);
|
||||
INSN(vfcvt_f_x_v, 0b1010111, 0b001, 0b00011, 0b010010);
|
||||
INSN(vfcvt_rtz_xu_f_v, 0b1010111, 0b001, 0b00110, 0b010010);
|
||||
INSN(vfcvt_f_x_v, 0b1010111, 0b001, 0b00011, 0b010010);
|
||||
INSN(vfcvt_rtz_x_f_v, 0b1010111, 0b001, 0b00111, 0b010010);
|
||||
|
||||
// Vector Widening Floating-Point/Integer Type-Convert Instructions
|
||||
INSN(vfwcvt_f_x_v, 0b1010111, 0b001, 0b01011, 0b010010);
|
||||
INSN(vfwcvt_f_f_v, 0b1010111, 0b001, 0b01100, 0b010010);
|
||||
INSN(vfwcvt_rtz_x_f_v, 0b1010111, 0b001, 0b01111, 0b010010);
|
||||
|
||||
// Vector Narrowing Floating-Point/Integer Type-Convert Instructions
|
||||
INSN(vfncvt_f_x_w, 0b1010111, 0b001, 0b10011, 0b010010);
|
||||
INSN(vfncvt_f_f_w, 0b1010111, 0b001, 0b10100, 0b010010);
|
||||
INSN(vfncvt_rtz_x_f_w, 0b1010111, 0b001, 0b10111, 0b010010);
|
||||
|
||||
// Vector Floating-Point Instruction
|
||||
INSN(vfsqrt_v, 0b1010111, 0b001, 0b00000, 0b010011);
|
||||
INSN(vfclass_v, 0b1010111, 0b001, 0b10000, 0b010011);
|
||||
@ -1431,6 +1437,9 @@ enum VectorMask {
|
||||
INSN(vsub_vv, 0b1010111, 0b000, 0b000010);
|
||||
INSN(vadd_vv, 0b1010111, 0b000, 0b000000);
|
||||
|
||||
// Vector Register Gather Instructions
|
||||
INSN(vrgather_vv, 0b1010111, 0b000, 0b001100);
|
||||
|
||||
#undef INSN
|
||||
|
||||
|
||||
|
@ -1617,10 +1617,10 @@ void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1,
|
||||
|
||||
// Set dst to NaN if any NaN input.
|
||||
void C2_MacroAssembler::minmax_fp_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
|
||||
bool is_double, bool is_min, int length_in_bytes) {
|
||||
bool is_double, bool is_min, int vector_length) {
|
||||
assert_different_registers(dst, src1, src2);
|
||||
|
||||
rvv_vsetvli(is_double ? T_DOUBLE : T_FLOAT, length_in_bytes);
|
||||
vsetvli_helper(is_double ? T_DOUBLE : T_FLOAT, vector_length);
|
||||
|
||||
is_min ? vfmin_vv(dst, src1, src2)
|
||||
: vfmax_vv(dst, src1, src2);
|
||||
@ -1635,11 +1635,11 @@ void C2_MacroAssembler::minmax_fp_v(VectorRegister dst, VectorRegister src1, Vec
|
||||
void C2_MacroAssembler::reduce_minmax_fp_v(FloatRegister dst,
|
||||
FloatRegister src1, VectorRegister src2,
|
||||
VectorRegister tmp1, VectorRegister tmp2,
|
||||
bool is_double, bool is_min, int length_in_bytes) {
|
||||
bool is_double, bool is_min, int vector_length) {
|
||||
assert_different_registers(src2, tmp1, tmp2);
|
||||
|
||||
Label L_done, L_NaN;
|
||||
rvv_vsetvli(is_double ? T_DOUBLE : T_FLOAT, length_in_bytes);
|
||||
vsetvli_helper(is_double ? T_DOUBLE : T_FLOAT, vector_length);
|
||||
vfmv_s_f(tmp2, src1);
|
||||
|
||||
is_min ? vfredmin_vs(tmp1, src2, tmp2)
|
||||
@ -1670,12 +1670,12 @@ bool C2_MacroAssembler::in_scratch_emit_size() {
|
||||
return MacroAssembler::in_scratch_emit_size();
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::rvv_reduce_integral(Register dst, VectorRegister tmp,
|
||||
Register src1, VectorRegister src2,
|
||||
BasicType bt, int opc, int length_in_bytes) {
|
||||
void C2_MacroAssembler::reduce_integral_v(Register dst, VectorRegister tmp,
|
||||
Register src1, VectorRegister src2,
|
||||
BasicType bt, int opc, int vector_length) {
|
||||
assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported element type");
|
||||
|
||||
rvv_vsetvli(bt, length_in_bytes);
|
||||
vsetvli_helper(bt, vector_length);
|
||||
|
||||
vmv_s_x(tmp, src1);
|
||||
|
||||
@ -1707,27 +1707,24 @@ void C2_MacroAssembler::rvv_reduce_integral(Register dst, VectorRegister tmp,
|
||||
}
|
||||
|
||||
// Set vl and vtype for full and partial vector operations.
|
||||
// (vlmul = m1, vma = mu, vta = tu, vill = false)
|
||||
void C2_MacroAssembler::rvv_vsetvli(BasicType bt, int length_in_bytes, Register tmp) {
|
||||
// (vma = mu, vta = tu, vill = false)
|
||||
void C2_MacroAssembler::vsetvli_helper(BasicType bt, int vector_length, LMUL vlmul, Register tmp) {
|
||||
Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
|
||||
if (length_in_bytes == MaxVectorSize) {
|
||||
vsetvli(tmp, x0, sew);
|
||||
if (vector_length <= 31) {
|
||||
vsetivli(tmp, vector_length, sew, vlmul);
|
||||
} else if (vector_length == (MaxVectorSize / type2aelembytes(bt))) {
|
||||
vsetvli(tmp, x0, sew, vlmul);
|
||||
} else {
|
||||
int num_elements = length_in_bytes / type2aelembytes(bt);
|
||||
if (num_elements <= 31) {
|
||||
vsetivli(tmp, num_elements, sew);
|
||||
} else {
|
||||
mv(tmp, num_elements);
|
||||
vsetvli(tmp, tmp, sew);
|
||||
}
|
||||
mv(tmp, vector_length);
|
||||
vsetvli(tmp, tmp, sew, vlmul);
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::compare_integral_v(VectorRegister vd, BasicType bt, int length_in_bytes,
|
||||
void C2_MacroAssembler::compare_integral_v(VectorRegister vd, BasicType bt, int vector_length,
|
||||
VectorRegister src1, VectorRegister src2, int cond, VectorMask vm) {
|
||||
assert(is_integral_type(bt), "unsupported element type");
|
||||
assert(vm == Assembler::v0_t ? vd != v0 : true, "should be different registers");
|
||||
rvv_vsetvli(bt, length_in_bytes);
|
||||
vsetvli_helper(bt, vector_length);
|
||||
vmclr_m(vd);
|
||||
switch (cond) {
|
||||
case BoolTest::eq: vmseq_vv(vd, src1, src2, vm); break;
|
||||
@ -1742,15 +1739,15 @@ void C2_MacroAssembler::compare_integral_v(VectorRegister vd, BasicType bt, int
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::compare_floating_point_v(VectorRegister vd, BasicType bt, int length_in_bytes,
|
||||
void C2_MacroAssembler::compare_floating_point_v(VectorRegister vd, BasicType bt, int vector_length,
|
||||
VectorRegister src1, VectorRegister src2,
|
||||
VectorRegister tmp1, VectorRegister tmp2,
|
||||
VectorRegister vmask, int cond, VectorMask vm) {
|
||||
assert(is_floating_point_type(bt), "unsupported element type");
|
||||
assert(vd != v0, "should be different registers");
|
||||
assert(vm == Assembler::v0_t ? vmask != v0 : true, "vmask should not be v0");
|
||||
rvv_vsetvli(bt, length_in_bytes);
|
||||
// Check vector elements of src1 and src2 for quiet or signaling NaN.
|
||||
vsetvli_helper(bt, vector_length);
|
||||
// Check vector elements of src1 and src2 for quiet and signaling NaN.
|
||||
vfclass_v(tmp1, src1);
|
||||
vfclass_v(tmp2, src2);
|
||||
vsrl_vi(tmp1, tmp1, 8);
|
||||
@ -1782,4 +1779,91 @@ void C2_MacroAssembler::compare_floating_point_v(VectorRegister vd, BasicType bt
|
||||
assert(false, "unsupported compare condition");
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::integer_extend_v(VectorRegister dst, BasicType dst_bt, int vector_length,
|
||||
VectorRegister src, BasicType src_bt) {
|
||||
assert(type2aelembytes(dst_bt) > type2aelembytes(src_bt) && type2aelembytes(dst_bt) <= 8 && type2aelembytes(src_bt) <= 4, "invalid element size");
|
||||
assert(dst_bt != T_FLOAT && dst_bt != T_DOUBLE && src_bt != T_FLOAT && src_bt != T_DOUBLE, "unsupported element type");
|
||||
// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#52-vector-operands
|
||||
// The destination EEW is greater than the source EEW, the source EMUL is at least 1,
|
||||
// and the overlap is in the highest-numbered part of the destination register group.
|
||||
// Since LMUL=1, vd and vs cannot be the same.
|
||||
assert_different_registers(dst, src);
|
||||
|
||||
vsetvli_helper(dst_bt, vector_length);
|
||||
if (src_bt == T_BYTE) {
|
||||
switch (dst_bt) {
|
||||
case T_SHORT:
|
||||
vsext_vf2(dst, src);
|
||||
break;
|
||||
case T_INT:
|
||||
vsext_vf4(dst, src);
|
||||
break;
|
||||
case T_LONG:
|
||||
vsext_vf8(dst, src);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
} else if (src_bt == T_SHORT) {
|
||||
if (dst_bt == T_INT) {
|
||||
vsext_vf2(dst, src);
|
||||
} else {
|
||||
vsext_vf4(dst, src);
|
||||
}
|
||||
} else if (src_bt == T_INT) {
|
||||
vsext_vf2(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
// Vector narrow from src to dst with specified element sizes.
|
||||
// High part of dst vector will be filled with zero.
|
||||
void C2_MacroAssembler::integer_narrow_v(VectorRegister dst, BasicType dst_bt, int vector_length,
|
||||
VectorRegister src, BasicType src_bt) {
|
||||
assert(type2aelembytes(dst_bt) < type2aelembytes(src_bt) && type2aelembytes(dst_bt) <= 4 && type2aelembytes(src_bt) <= 8, "invalid element size");
|
||||
assert(dst_bt != T_FLOAT && dst_bt != T_DOUBLE && src_bt != T_FLOAT && src_bt != T_DOUBLE, "unsupported element type");
|
||||
mv(t0, vector_length);
|
||||
if (src_bt == T_LONG) {
|
||||
// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#117-vector-narrowing-integer-right-shift-instructions
|
||||
// Future extensions might add support for versions that narrow to a destination that is 1/4 the width of the source.
|
||||
// So we can currently only scale down by 1/2 the width at a time.
|
||||
vsetvli(t0, t0, Assembler::e32, Assembler::mf2);
|
||||
vncvt_x_x_w(dst, src);
|
||||
if (dst_bt == T_SHORT || dst_bt == T_BYTE) {
|
||||
vsetvli(t0, t0, Assembler::e16, Assembler::mf2);
|
||||
vncvt_x_x_w(dst, dst);
|
||||
if (dst_bt == T_BYTE) {
|
||||
vsetvli(t0, t0, Assembler::e8, Assembler::mf2);
|
||||
vncvt_x_x_w(dst, dst);
|
||||
}
|
||||
}
|
||||
} else if (src_bt == T_INT) {
|
||||
// T_SHORT
|
||||
vsetvli(t0, t0, Assembler::e16, Assembler::mf2);
|
||||
vncvt_x_x_w(dst, src);
|
||||
if (dst_bt == T_BYTE) {
|
||||
vsetvli(t0, t0, Assembler::e8, Assembler::mf2);
|
||||
vncvt_x_x_w(dst, dst);
|
||||
}
|
||||
} else if (src_bt == T_SHORT) {
|
||||
vsetvli(t0, t0, Assembler::e8, Assembler::mf2);
|
||||
vncvt_x_x_w(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
#define VFCVT_SAFE(VFLOATCVT) \
|
||||
void C2_MacroAssembler::VFLOATCVT##_safe(VectorRegister dst, VectorRegister src) { \
|
||||
assert_different_registers(dst, src); \
|
||||
vfclass_v(v0, src); \
|
||||
vxor_vv(dst, dst, dst); \
|
||||
vsrl_vi(v0, v0, 8); \
|
||||
vmseq_vx(v0, v0, zr); \
|
||||
VFLOATCVT(dst, src, Assembler::v0_t); \
|
||||
}
|
||||
|
||||
VFCVT_SAFE(vfcvt_rtz_x_f_v);
|
||||
VFCVT_SAFE(vfwcvt_rtz_x_f_v);
|
||||
VFCVT_SAFE(vfncvt_rtz_x_f_w);
|
||||
|
||||
#undef VFCVT_SAFE
|
@ -187,23 +187,23 @@
|
||||
|
||||
void minmax_fp_v(VectorRegister dst,
|
||||
VectorRegister src1, VectorRegister src2,
|
||||
bool is_double, bool is_min, int length_in_bytes);
|
||||
bool is_double, bool is_min, int vector_length);
|
||||
|
||||
void reduce_minmax_fp_v(FloatRegister dst,
|
||||
FloatRegister src1, VectorRegister src2,
|
||||
VectorRegister tmp1, VectorRegister tmp2,
|
||||
bool is_double, bool is_min, int length_in_bytes);
|
||||
bool is_double, bool is_min, int vector_length);
|
||||
|
||||
void rvv_reduce_integral(Register dst, VectorRegister tmp,
|
||||
Register src1, VectorRegister src2,
|
||||
BasicType bt, int opc, int length_in_bytes);
|
||||
void reduce_integral_v(Register dst, VectorRegister tmp,
|
||||
Register src1, VectorRegister src2,
|
||||
BasicType bt, int opc, int vector_length);
|
||||
|
||||
void rvv_vsetvli(BasicType bt, int length_in_bytes, Register tmp = t0);
|
||||
void vsetvli_helper(BasicType bt, int vector_length, LMUL vlmul = Assembler::m1, Register tmp = t0);
|
||||
|
||||
void compare_integral_v(VectorRegister dst, BasicType bt, int length_in_bytes,
|
||||
void compare_integral_v(VectorRegister dst, BasicType bt, int vector_length,
|
||||
VectorRegister src1, VectorRegister src2, int cond, VectorMask vm = Assembler::unmasked);
|
||||
|
||||
void compare_floating_point_v(VectorRegister dst, BasicType bt, int length_in_bytes,
|
||||
void compare_floating_point_v(VectorRegister dst, BasicType bt, int vector_length,
|
||||
VectorRegister src1, VectorRegister src2, VectorRegister tmp1, VectorRegister tmp2,
|
||||
VectorRegister vmask, int cond, VectorMask vm = Assembler::unmasked);
|
||||
|
||||
@ -211,13 +211,13 @@
|
||||
// we assume each predicate register is one-eighth of the size of
|
||||
// scalable vector register, one mask bit per vector byte.
|
||||
void spill_vmask(VectorRegister v, int offset){
|
||||
rvv_vsetvli(T_BYTE, MaxVectorSize >> 3);
|
||||
vsetvli_helper(T_BYTE, MaxVectorSize >> 3);
|
||||
add(t0, sp, offset);
|
||||
vse8_v(v, t0);
|
||||
}
|
||||
|
||||
void unspill_vmask(VectorRegister v, int offset){
|
||||
rvv_vsetvli(T_BYTE, MaxVectorSize >> 3);
|
||||
vsetvli_helper(T_BYTE, MaxVectorSize >> 3);
|
||||
add(t0, sp, offset);
|
||||
vle8_v(v, t0);
|
||||
}
|
||||
@ -230,4 +230,14 @@
|
||||
}
|
||||
}
|
||||
|
||||
void integer_extend_v(VectorRegister dst, BasicType dst_bt, int vector_length,
|
||||
VectorRegister src, BasicType src_bt);
|
||||
|
||||
void integer_narrow_v(VectorRegister dst, BasicType dst_bt, int vector_length,
|
||||
VectorRegister src, BasicType src_bt);
|
||||
|
||||
void vfcvt_rtz_x_f_v_safe(VectorRegister dst, VectorRegister src);
|
||||
void vfwcvt_rtz_x_f_v_safe(VectorRegister dst, VectorRegister src);
|
||||
void vfncvt_rtz_x_f_w_safe(VectorRegister dst, VectorRegister src);
|
||||
|
||||
#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
|
||||
|
@ -1944,6 +1944,7 @@ const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, Bas
|
||||
case Op_URShiftVI:
|
||||
case Op_URShiftVL:
|
||||
case Op_VectorBlend:
|
||||
case Op_VectorReinterpret:
|
||||
break;
|
||||
case Op_LoadVector:
|
||||
opcode = Op_LoadVectorMasked;
|
||||
@ -2030,7 +2031,10 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
||||
int max_size = max_vector_size(bt);
|
||||
// Limit the min vector size to 8 bytes.
|
||||
int size = 8 / type2aelembytes(bt);
|
||||
if (bt == T_BOOLEAN) {
|
||||
if (bt == T_BYTE) {
|
||||
// To support vector api shuffle/rearrange.
|
||||
size = 4;
|
||||
} else if (bt == T_BOOLEAN) {
|
||||
// To support vector api load/store mask.
|
||||
size = 2;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user