diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index 35f36a1f1c0..d1669cd3737 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -1488,6 +1488,16 @@ enum VectorMask { #undef INSN +#define INSN(NAME, op, funct3, vm, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, Register Rs1) { \ + patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \ + } + + // Vector Integer Merge Instructions + INSN(vmerge_vxm, 0b1010111, 0b100, 0b0, 0b010111); + +#undef INSN + #define INSN(NAME, op, funct3, funct6) \ void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1, VectorMask vm = unmasked) { \ patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \ @@ -1542,6 +1552,17 @@ enum VectorMask { #undef INSN +#define INSN(NAME, op, funct3, vm, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, int32_t imm) { \ + guarantee(is_simm5(imm), "imm is invalid"); \ + patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \ + } + + // Vector Integer Merge Instructions + INSN(vmerge_vim, 0b1010111, 0b011, 0b0, 0b010111); + +#undef INSN + #define INSN(NAME, op, funct3, vm, funct6) \ void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1) { \ patch_VArith(op, Vd, funct3, Vs1->raw_encoding(), Vs2, vm, funct6); \ @@ -1560,6 +1581,9 @@ enum VectorMask { INSN(vmnand_mm, 0b1010111, 0b010, 0b1, 0b011101); INSN(vmand_mm, 0b1010111, 0b010, 0b1, 0b011001); + // Vector Integer Merge Instructions + INSN(vmerge_vvm, 0b1010111, 0b000, 0b0, 0b010111); + #undef INSN #define INSN(NAME, op, funct3, Vs2, vm, funct6) \ diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index ba8f221e291..01d99db782c 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -1304,7 +1304,7 @@ void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Regis } // Set dst to NaN if any NaN input. -void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, +void C2_MacroAssembler::minmax_fp(FloatRegister dst, FloatRegister src1, FloatRegister src2, bool is_double, bool is_min) { assert_different_registers(dst, src1, src2); @@ -1616,7 +1616,7 @@ void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1, } // Set dst to NaN if any NaN input. -void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, +void C2_MacroAssembler::minmax_fp_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, bool is_double, bool is_min, int length_in_bytes) { assert_different_registers(dst, src1, src2); @@ -1632,7 +1632,7 @@ void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, Vec } // Set dst to NaN if any NaN input. -void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst, +void C2_MacroAssembler::reduce_minmax_fp_v(FloatRegister dst, FloatRegister src1, VectorRegister src2, VectorRegister tmp1, VectorRegister tmp2, bool is_double, bool is_min, int length_in_bytes) { @@ -1722,3 +1722,64 @@ void C2_MacroAssembler::rvv_vsetvli(BasicType bt, int length_in_bytes, Register } } } + +void C2_MacroAssembler::compare_integral_v(VectorRegister vd, BasicType bt, int length_in_bytes, + VectorRegister src1, VectorRegister src2, int cond, VectorMask vm) { + assert(is_integral_type(bt), "unsupported element type"); + assert(vm == Assembler::v0_t ? vd != v0 : true, "should be different registers"); + rvv_vsetvli(bt, length_in_bytes); + vmclr_m(vd); + switch (cond) { + case BoolTest::eq: vmseq_vv(vd, src1, src2, vm); break; + case BoolTest::ne: vmsne_vv(vd, src1, src2, vm); break; + case BoolTest::le: vmsle_vv(vd, src1, src2, vm); break; + case BoolTest::ge: vmsge_vv(vd, src1, src2, vm); break; + case BoolTest::lt: vmslt_vv(vd, src1, src2, vm); break; + case BoolTest::gt: vmsgt_vv(vd, src1, src2, vm); break; + default: + assert(false, "unsupported compare condition"); + ShouldNotReachHere(); + } +} + +void C2_MacroAssembler::compare_floating_point_v(VectorRegister vd, BasicType bt, int length_in_bytes, + VectorRegister src1, VectorRegister src2, + VectorRegister tmp1, VectorRegister tmp2, + VectorRegister vmask, int cond, VectorMask vm) { + assert(is_floating_point_type(bt), "unsupported element type"); + assert(vd != v0, "should be different registers"); + assert(vm == Assembler::v0_t ? vmask != v0 : true, "vmask should not be v0"); + rvv_vsetvli(bt, length_in_bytes); + // Check vector elements of src1 and src2 for quiet or signaling NaN. + vfclass_v(tmp1, src1); + vfclass_v(tmp2, src2); + vsrl_vi(tmp1, tmp1, 8); + vsrl_vi(tmp2, tmp2, 8); + vmseq_vx(tmp1, tmp1, zr); + vmseq_vx(tmp2, tmp2, zr); + if (vm == Assembler::v0_t) { + vmand_mm(tmp2, tmp1, tmp2); + if (cond == BoolTest::ne) { + vmandn_mm(tmp1, vmask, tmp2); + } + vmand_mm(v0, vmask, tmp2); + } else { + vmand_mm(v0, tmp1, tmp2); + if (cond == BoolTest::ne) { + vmnot_m(tmp1, v0); + } + } + vmclr_m(vd); + switch (cond) { + case BoolTest::eq: vmfeq_vv(vd, src1, src2, Assembler::v0_t); break; + case BoolTest::ne: vmfne_vv(vd, src1, src2, Assembler::v0_t); + vmor_mm(vd, vd, tmp1); break; + case BoolTest::le: vmfle_vv(vd, src1, src2, Assembler::v0_t); break; + case BoolTest::ge: vmfge_vv(vd, src1, src2, Assembler::v0_t); break; + case BoolTest::lt: vmflt_vv(vd, src1, src2, Assembler::v0_t); break; + case BoolTest::gt: vmfgt_vv(vd, src1, src2, Assembler::v0_t); break; + default: + assert(false, "unsupported compare condition"); + ShouldNotReachHere(); + } +} \ No newline at end of file diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index 94a5068fd9a..30aac05f40b 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -137,13 +137,15 @@ vl1re8_v(v, t0); } - void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) { - assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); - unspill(v0, src_offset); - spill(v0, dst_offset); + void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vector_length_in_bytes) { + assert(vector_length_in_bytes % 16 == 0, "unexpected vector reg size"); + for (int i = 0; i < vector_length_in_bytes / 8; i++) { + unspill(t0, true, src_offset + (i * 8)); + spill(t0, true, dst_offset + (i * 8)); + } } - void minmax_FD(FloatRegister dst, + void minmax_fp(FloatRegister dst, FloatRegister src1, FloatRegister src2, bool is_double, bool is_min); @@ -183,11 +185,11 @@ Register tmp1, Register tmp2, bool isL); - void minmax_FD_v(VectorRegister dst, + void minmax_fp_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, bool is_double, bool is_min, int length_in_bytes); - void reduce_minmax_FD_v(FloatRegister dst, + void reduce_minmax_fp_v(FloatRegister dst, FloatRegister src1, VectorRegister src2, VectorRegister tmp1, VectorRegister tmp2, bool is_double, bool is_min, int length_in_bytes); @@ -198,4 +200,34 @@ void rvv_vsetvli(BasicType bt, int length_in_bytes, Register tmp = t0); + void compare_integral_v(VectorRegister dst, BasicType bt, int length_in_bytes, + VectorRegister src1, VectorRegister src2, int cond, VectorMask vm = Assembler::unmasked); + + void compare_floating_point_v(VectorRegister dst, BasicType bt, int length_in_bytes, + VectorRegister src1, VectorRegister src2, VectorRegister tmp1, VectorRegister tmp2, + VectorRegister vmask, int cond, VectorMask vm = Assembler::unmasked); + + // In Matcher::scalable_predicate_reg_slots, + // we assume each predicate register is one-eighth of the size of + // scalable vector register, one mask bit per vector byte. + void spill_vmask(VectorRegister v, int offset){ + rvv_vsetvli(T_BYTE, MaxVectorSize >> 3); + add(t0, sp, offset); + vse8_v(v, t0); + } + + void unspill_vmask(VectorRegister v, int offset){ + rvv_vsetvli(T_BYTE, MaxVectorSize >> 3); + add(t0, sp, offset); + vle8_v(v, t0); + } + + void spill_copy_vmask_stack_to_stack(int src_offset, int dst_offset, int vector_length_in_bytes) { + assert(vector_length_in_bytes % 4 == 0, "unexpected vector mask reg size"); + for (int i = 0; i < vector_length_in_bytes / 4; i++) { + unspill(t0, false, src_offset + (i * 4)); + spill(t0, false, dst_offset + (i * 4)); + } + } + #endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 18a03eff7ee..c60d1a5ad66 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -1264,7 +1264,7 @@ public: vmnand_mm(vd, vs, vs); } - inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) { + inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) { vnsrl_wx(vd, vs, x0, vm); } @@ -1276,6 +1276,45 @@ public: vfsgnjn_vv(vd, vs, vs); } + inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { + vmslt_vv(vd, vs1, vs2, vm); + } + + inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { + vmsltu_vv(vd, vs1, vs2, vm); + } + + inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { + vmsle_vv(vd, vs1, vs2, vm); + } + + inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { + vmsleu_vv(vd, vs1, vs2, vm); + } + + inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { + vmflt_vv(vd, vs1, vs2, vm); + } + + inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { + vmfle_vv(vd, vs1, vs2, vm); + } + + // Copy mask register + inline void vmmv_m(VectorRegister vd, VectorRegister vs) { + vmand_mm(vd, vs, vs); + } + + // Clear mask register + inline void vmclr_m(VectorRegister vd) { + vmxor_mm(vd, vd, vd); + } + + // Set mask register + inline void vmset_m(VectorRegister vd) { + vmxnor_mm(vd, vd, vd); + } + static const int zero_words_block_size; void cast_primitive_type(BasicType type, Register Rt) { diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp index eeee72f3910..a2b38ee4a48 100644 --- a/src/hotspot/cpu/riscv/matcher_riscv.hpp +++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp @@ -149,7 +149,7 @@ // Some microarchitectures have mask registers used on vectors static const bool has_predicated_vectors(void) { - return false; + return UseRVV; } // true means we have fast l2f conversion diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 708defd68e7..88dd95a1b8a 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -830,7 +830,8 @@ reg_class double_reg( F31, F31_H ); -// Class for all RVV vector registers +// Class for RVV vector registers +// Note: v0, v30 and v31 are used as mask registers. reg_class vectora_reg( V1, V1_H, V1_J, V1_K, V2, V2_H, V2_J, V2_K, @@ -860,9 +861,7 @@ reg_class vectora_reg( V26, V26_H, V26_J, V26_K, V27, V27_H, V27_J, V27_K, V28, V28_H, V28_J, V28_K, - V29, V29_H, V29_J, V29_K, - V30, V30_H, V30_J, V30_K, - V31, V31_H, V31_J, V31_K + V29, V29_H, V29_J, V29_K ); // Class for 64 bit register f0 @@ -912,6 +911,23 @@ reg_class v5_reg( // class for condition codes reg_class reg_flags(RFLAGS); + +// Class for RVV v0 mask register +// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#53-vector-masking +// The mask value used to control execution of a masked vector +// instruction is always supplied by vector register v0. +reg_class vmask_reg_v0 ( + V0 +); + +// Class for RVV mask registers +// We need two more vmask registers to do the vector mask logical ops, +// so define v30, v31 as mask register too. +reg_class vmask_reg ( + V0, + V30, + V31 +); %} //----------DEFINITION BLOCK--------------------------------------------------- @@ -1522,7 +1538,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); - if (src_hi != OptoReg::Bad) { + if (src_hi != OptoReg::Bad && !bottom_type()->isa_vectmask()) { assert((src_lo & 1) == 0 && src_lo + 1 == src_hi && (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi, "expected aligned-adjacent pairs"); @@ -1558,6 +1574,25 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo } else { ShouldNotReachHere(); } + } else if (bottom_type()->isa_vectmask() && cbuf) { + C2_MacroAssembler _masm(cbuf); + int vmask_size_in_bytes = Matcher::scalable_predicate_reg_slots() * 32 / 8; + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + // stack to stack + __ spill_copy_vmask_stack_to_stack(src_offset, dst_offset, + vmask_size_in_bytes); + } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) { + // vmask to stack + __ spill_vmask(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo)); + } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) { + // stack to vmask + __ unspill_vmask(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo)); + } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { + // vmask to vmask + __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); + } else { + ShouldNotReachHere(); + } } } else if (cbuf != NULL) { C2_MacroAssembler _masm(cbuf); @@ -1642,7 +1677,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo } else { st->print("%s", Matcher::regName[dst_lo]); } - if (bottom_type()->isa_vect() != NULL) { + if (bottom_type()->isa_vect() && !bottom_type()->isa_vectmask()) { int vsize = 0; if (ideal_reg() == Op_VecA) { vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; @@ -1650,6 +1685,10 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo ShouldNotReachHere(); } st->print("\t# vector spill size = %d", vsize); + } else if (ideal_reg() == Op_RegVectMask) { + assert(Matcher::supports_scalable_vector(), "bad register type for spill"); + int vsize = Matcher::scalable_predicate_reg_slots() * 32; + st->print("\t# vmask spill size = %d", vsize); } else { st->print("\t# spill size = %d", is64 ? 64 : 32); } @@ -1863,7 +1902,59 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType } const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { - return false; + if (!UseRVV) { + return false; + } + switch (opcode) { + case Op_AddVB: + case Op_AddVS: + case Op_AddVI: + case Op_AddVL: + case Op_AddVF: + case Op_AddVD: + case Op_SubVB: + case Op_SubVS: + case Op_SubVI: + case Op_SubVL: + case Op_SubVF: + case Op_SubVD: + case Op_MulVB: + case Op_MulVS: + case Op_MulVI: + case Op_MulVL: + case Op_MulVF: + case Op_MulVD: + case Op_DivVF: + case Op_DivVD: + case Op_VectorLoadMask: + case Op_VectorMaskCmp: + case Op_AndVMask: + case Op_XorVMask: + case Op_OrVMask: + case Op_RShiftVB: + case Op_RShiftVS: + case Op_RShiftVI: + case Op_RShiftVL: + case Op_LShiftVB: + case Op_LShiftVS: + case Op_LShiftVI: + case Op_LShiftVL: + case Op_URShiftVB: + case Op_URShiftVS: + case Op_URShiftVI: + case Op_URShiftVL: + case Op_VectorBlend: + break; + case Op_LoadVector: + opcode = Op_LoadVectorMasked; + break; + case Op_StoreVector: + opcode = Op_StoreVectorMasked; + break; + default: + return false; + } + return match_rule_supported_vector(opcode, vlen, bt); } const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { @@ -1875,11 +1966,11 @@ const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) { } const RegMask* Matcher::predicate_reg_mask(void) { - return NULL; + return &_VMASK_REG_mask; } const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { - return NULL; + return new TypeVectMask(elemTy, length); } // Vector calling convention not yet implemented. @@ -3556,6 +3647,28 @@ operand vReg_V5() interface(REG_INTER); %} +operand vRegMask() +%{ + constraint(ALLOC_IN_RC(vmask_reg)); + match(RegVectMask); + match(vRegMask_V0); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// The mask value used to control execution of a masked +// vector instruction is always supplied by vector register v0. +operand vRegMask_V0() +%{ + constraint(ALLOC_IN_RC(vmask_reg_v0)); + match(RegVectMask); + match(vRegMask); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + // Java Thread Register operand javaThread_RegP(iRegP reg) %{ @@ -7271,7 +7384,7 @@ instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{ format %{ "maxF $dst, $src1, $src2" %} ins_encode %{ - __ minmax_FD(as_FloatRegister($dst$$reg), + __ minmax_fp(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), false /* is_double */, false /* is_min */); %} @@ -7287,7 +7400,7 @@ instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{ format %{ "minF $dst, $src1, $src2" %} ins_encode %{ - __ minmax_FD(as_FloatRegister($dst$$reg), + __ minmax_fp(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), false /* is_double */, true /* is_min */); %} @@ -7303,7 +7416,7 @@ instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{ format %{ "maxD $dst, $src1, $src2" %} ins_encode %{ - __ minmax_FD(as_FloatRegister($dst$$reg), + __ minmax_fp(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), true /* is_double */, false /* is_min */); %} @@ -7319,7 +7432,7 @@ instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{ format %{ "minD $dst, $src1, $src2" %} ins_encode %{ - __ minmax_FD(as_FloatRegister($dst$$reg), + __ minmax_fp(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), true /* is_double */, true /* is_min */); %} diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad index 700cb18eafb..240baa1b577 100644 --- a/src/hotspot/cpu/riscv/riscv_v.ad +++ b/src/hotspot/cpu/riscv/riscv_v.ad @@ -35,14 +35,18 @@ source_hpp %{ source %{ static void loadStore(C2_MacroAssembler masm, bool is_store, - VectorRegister reg, BasicType bt, Register base, int length_in_bytes) { + VectorRegister reg, BasicType bt, Register base, + int length_in_bytes, Assembler::VectorMask vm = Assembler::unmasked) { Assembler::SEW sew = Assembler::elemtype_to_sew(bt); masm.rvv_vsetvli(bt, length_in_bytes); if (is_store) { - masm.vsex_v(reg, base, sew); + masm.vsex_v(reg, base, sew, vm); } else { - masm.vlex_v(reg, base, sew); + if (vm == Assembler::v0_t) { + masm.vxor_vv(reg, reg, reg); + } + masm.vlex_v(reg, base, sew, vm); } } @@ -66,7 +70,6 @@ source %{ // Vector API specific case Op_LoadVectorGather: case Op_StoreVectorScatter: - case Op_VectorBlend: case Op_VectorCast: case Op_VectorCastB2X: case Op_VectorCastD2X: @@ -75,12 +78,9 @@ source %{ case Op_VectorCastL2X: case Op_VectorCastS2X: case Op_VectorInsert: - case Op_VectorLoadMask: case Op_VectorLoadShuffle: - case Op_VectorMaskCmp: case Op_VectorRearrange: case Op_VectorReinterpret: - case Op_VectorStoreMask: case Op_VectorTest: case Op_PopCountVI: case Op_PopCountVL: @@ -123,6 +123,112 @@ instruct storeV(vReg src, vmemA mem) %{ ins_pipe(pipe_slow); %} +// vector load mask + +instruct vloadmask(vRegMask dst, vReg src) %{ + match(Set dst (VectorLoadMask src)); + format %{ "vloadmask $dst, $src" %} + ins_encode %{ + __ rvv_vsetvli(T_BOOLEAN, Matcher::vector_length(this)); + __ vmsne_vx(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), zr); + %} + ins_pipe(pipe_slow); +%} + +instruct vloadmask_masked(vRegMask dst, vReg src, vRegMask_V0 v0) %{ + match(Set dst (VectorLoadMask src v0)); + format %{ "vloadmask_masked $dst, $src, $v0" %} + ins_encode %{ + __ rvv_vsetvli(T_BOOLEAN, Matcher::vector_length(this)); + __ vmsne_vx(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), zr, Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +// vector store mask + +instruct vstoremask(vReg dst, vRegMask_V0 v0, immI size) %{ + match(Set dst (VectorStoreMask v0 size)); + format %{ "vstoremask $dst, V0" %} + ins_encode %{ + __ rvv_vsetvli(T_BOOLEAN, Matcher::vector_length(this)); + __ vmv_v_x(as_VectorRegister($dst$$reg), zr); + __ vmerge_vim(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), 1); + %} + ins_pipe(pipe_slow); +%} + +// vector mask compare + +instruct vmaskcmp(vRegMask dst, vReg src1, vReg src2, immI cond) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT || + Matcher::vector_element_basic_type(n) == T_LONG); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "vmaskcmp $dst, $src1, $src2, $cond" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + __ compare_integral_v(as_VectorRegister($dst$$reg), bt, length_in_bytes, as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg), (int)($cond$$constant)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmaskcmp_masked(vRegMask dst, vReg src1, vReg src2, immI cond, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT || + Matcher::vector_element_basic_type(n) == T_LONG); + match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond v0))); + effect(TEMP_DEF dst); + format %{ "vmaskcmp_masked $dst, $src1, $src2, $cond, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + __ compare_integral_v(as_VectorRegister($dst$$reg), bt, length_in_bytes, as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg), (int)($cond$$constant), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +// vector mask float compare + +instruct vmaskcmp_fp(vRegMask dst, vReg src1, vReg src2, immI cond, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{ + predicate(Matcher::vector_element_basic_type(n) == T_FLOAT || + Matcher::vector_element_basic_type(n) == T_DOUBLE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP v0); + format %{ "vmaskcmp_fp $dst, $src1, $src2, $cond\t# KILL $tmp1, $tmp2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + __ compare_floating_point_v(as_VectorRegister($dst$$reg), bt, length_in_bytes, + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + as_VectorRegister($v0$$reg), (int)($cond$$constant)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmaskcmp_fp_masked(vRegMask dst, vReg src1, vReg src2, immI cond, vRegMask vmask, vReg tmp1, vReg tmp2, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_FLOAT || + Matcher::vector_element_basic_type(n) == T_DOUBLE); + match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond vmask))); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP v0); + format %{ "vmaskcmp_fp_masked $dst, $src1, $src2, $cond, $vmask\t# KILL $tmp1, $tmp2, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + __ compare_floating_point_v(as_VectorRegister($dst$$reg), bt, length_in_bytes, + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + as_VectorRegister($vmask$$reg), (int)($cond$$constant), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector abs instruct vabsB(vReg dst, vReg src, vReg tmp) %{ @@ -283,6 +389,40 @@ instruct vaddD(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +// vector add - predicated + +instruct vadd_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst_src1 (AddVB (Binary dst_src1 src2) v0)); + match(Set dst_src1 (AddVS (Binary dst_src1 src2) v0)); + match(Set dst_src1 (AddVI (Binary dst_src1 src2) v0)); + match(Set dst_src1 (AddVL (Binary dst_src1 src2) v0)); + ins_cost(VEC_COST); + format %{ "vadd.vv $dst_src1, $src2, $v0\t#@vadd_masked" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vadd_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vadd_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst_src1 (AddVF (Binary dst_src1 src2) v0)); + match(Set dst_src1 (AddVD (Binary dst_src1 src2) v0)); + ins_cost(VEC_COST); + format %{ "vfadd.vv $dst_src1, $src2, $v0\t#@vadd_fp_masked" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vfadd_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector and instruct vand(vReg dst, vReg src1, vReg src2) %{ @@ -290,7 +430,8 @@ instruct vand(vReg dst, vReg src1, vReg src2) %{ ins_cost(VEC_COST); format %{ "vand.vv $dst, $src1, $src2\t#@vand" %} ins_encode %{ - __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this)); + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); __ vand_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); @@ -305,7 +446,8 @@ instruct vor(vReg dst, vReg src1, vReg src2) %{ ins_cost(VEC_COST); format %{ "vor.vv $dst, $src1, $src2\t#@vor" %} ins_encode %{ - __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this)); + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); @@ -320,7 +462,8 @@ instruct vxor(vReg dst, vReg src1, vReg src2) %{ ins_cost(VEC_COST); format %{ "vxor.vv $dst, $src1, $src2\t#@vxor" %} ins_encode %{ - __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this)); + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); @@ -356,6 +499,23 @@ instruct vdivD(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +// vector float div - predicated + +instruct vdiv_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst_src1 (DivVF (Binary dst_src1 src2) v0)); + match(Set dst_src1 (DivVD (Binary dst_src1 src2) v0)); + ins_cost(VEC_COST); + format %{ "vfdiv.vv $dst_src1, $src2, $v0\t#@vdiv_fp_masked" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vfdiv_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector integer max/min instruct vmax(vReg dst, vReg src1, vReg src2) %{ @@ -397,7 +557,7 @@ instruct vmaxF(vReg dst, vReg src1, vReg src2) %{ ins_cost(VEC_COST); format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %} ins_encode %{ - __ minmax_FD_v(as_VectorRegister($dst$$reg), + __ minmax_fp_v(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), false /* is_double */, false /* is_min */, Matcher::vector_length_in_bytes(this)); %} @@ -411,7 +571,7 @@ instruct vmaxD(vReg dst, vReg src1, vReg src2) %{ ins_cost(VEC_COST); format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %} ins_encode %{ - __ minmax_FD_v(as_VectorRegister($dst$$reg), + __ minmax_fp_v(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), true /* is_double */, false /* is_min */, Matcher::vector_length_in_bytes(this)); %} @@ -425,7 +585,7 @@ instruct vminF(vReg dst, vReg src1, vReg src2) %{ ins_cost(VEC_COST); format %{ "vminF $dst, $src1, $src2\t#@vminF" %} ins_encode %{ - __ minmax_FD_v(as_VectorRegister($dst$$reg), + __ minmax_fp_v(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), false /* is_double */, true /* is_min */, Matcher::vector_length_in_bytes(this)); %} @@ -439,7 +599,7 @@ instruct vminD(vReg dst, vReg src1, vReg src2) %{ ins_cost(VEC_COST); format %{ "vminD $dst, $src1, $src2\t#@vminD" %} ins_encode %{ - __ minmax_FD_v(as_VectorRegister($dst$$reg), + __ minmax_fp_v(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), true /* is_double */, true /* is_min */, Matcher::vector_length_in_bytes(this)); %} @@ -756,6 +916,38 @@ instruct vmulD(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +// vector mul - predicated + +instruct vmul_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst_src1 (MulVB (Binary dst_src1 src2) v0)); + match(Set dst_src1 (MulVS (Binary dst_src1 src2) v0)); + match(Set dst_src1 (MulVI (Binary dst_src1 src2) v0)); + match(Set dst_src1 (MulVL (Binary dst_src1 src2) v0)); + ins_cost(VEC_COST); + format %{ "vmul.vv $dst_src1, $src2, $v0\t#@vmul_masked" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vmul_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vmul_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst_src1 (MulVF (Binary dst_src1 src2) v0)); + match(Set dst_src1 (MulVD (Binary dst_src1 src2) v0)); + ins_cost(VEC_COST); + format %{ "vmul.vv $dst_src1, $src2, $v0\t#@vmul_fp_masked" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vfmul_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector neg instruct vnegI(vReg dst, vReg src) %{ @@ -1000,7 +1192,7 @@ instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ match(Set dst (MaxReductionV src1 src2)); ins_cost(VEC_COST); effect(TEMP tmp); - format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %} + format %{ "vreduce_maxI $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg), @@ -1015,7 +1207,7 @@ instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ match(Set dst (MaxReductionV src1 src2)); ins_cost(VEC_COST); effect(TEMP tmp); - format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %} + format %{ "vreduce_maxL $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg), @@ -1034,7 +1226,7 @@ instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ match(Set dst (MinReductionV src1 src2)); ins_cost(VEC_COST); effect(TEMP tmp); - format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %} + format %{ "vreduce_minI $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg), @@ -1049,7 +1241,7 @@ instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ match(Set dst (MinReductionV src1 src2)); ins_cost(VEC_COST); effect(TEMP tmp); - format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %} + format %{ "vreduce_minL $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); __ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg), @@ -1068,7 +1260,7 @@ instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %} ins_encode %{ - __ reduce_minmax_FD_v($dst$$FloatRegister, + __ reduce_minmax_fp_v($dst$$FloatRegister, $src1$$FloatRegister, as_VectorRegister($src2$$reg), as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), false /* is_double */, false /* is_min */, Matcher::vector_length_in_bytes(this, $src2)); @@ -1083,7 +1275,7 @@ instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %} ins_encode %{ - __ reduce_minmax_FD_v($dst$$FloatRegister, + __ reduce_minmax_fp_v($dst$$FloatRegister, $src1$$FloatRegister, as_VectorRegister($src2$$reg), as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), true /* is_double */, false /* is_min */, Matcher::vector_length_in_bytes(this, $src2)); @@ -1100,7 +1292,7 @@ instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %} ins_encode %{ - __ reduce_minmax_FD_v($dst$$FloatRegister, + __ reduce_minmax_fp_v($dst$$FloatRegister, $src1$$FloatRegister, as_VectorRegister($src2$$reg), as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), false /* is_double */, true /* is_min */, Matcher::vector_length_in_bytes(this, $src2)); @@ -1115,7 +1307,7 @@ instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %} ins_encode %{ - __ reduce_minmax_FD_v($dst$$FloatRegister, + __ reduce_minmax_fp_v($dst$$FloatRegister, $src1$$FloatRegister, as_VectorRegister($src2$$reg), as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), true /* is_double */, true /* is_min */, Matcher::vector_length_in_bytes(this, $src2)); @@ -1265,44 +1457,38 @@ instruct replicateD(vReg dst, fRegD src) %{ // vector shift -instruct vasrB(vReg dst, vReg src, vReg shift) %{ +instruct vasrB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ match(Set dst (RShiftVB src shift)); ins_cost(VEC_COST); - effect(TEMP_DEF dst); - format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t" - "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t" - "vmnot.m v0, v0\n\t" - "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} + effect(TEMP_DEF dst, TEMP v0); + format %{ "vasrB $dst, $src, $shift" %} ins_encode %{ __ rvv_vsetvli(T_BYTE, Matcher::vector_length_in_bytes(this)); // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits - __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); + __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1); __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), BitsPerByte - 1, Assembler::v0_t); // otherwise, shift - __ vmnot_m(v0, v0); + __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg)); __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($shift$$reg), Assembler::v0_t); %} ins_pipe(pipe_slow); %} -instruct vasrS(vReg dst, vReg src, vReg shift) %{ +instruct vasrS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ match(Set dst (RShiftVS src shift)); ins_cost(VEC_COST); - effect(TEMP_DEF dst); - format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t" - "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t" - "vmnot.m v0, v0\n\t" - "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} + effect(TEMP_DEF dst, TEMP v0); + format %{ "vasrS $dst, $src, $shift" %} ins_encode %{ __ rvv_vsetvli(T_SHORT, Matcher::vector_length_in_bytes(this)); // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits - __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); + __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1); __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), BitsPerShort - 1, Assembler::v0_t); // otherwise, shift - __ vmnot_m(v0, v0); + __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg)); __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($shift$$reg), Assembler::v0_t); %} @@ -1312,7 +1498,7 @@ instruct vasrS(vReg dst, vReg src, vReg shift) %{ instruct vasrI(vReg dst, vReg src, vReg shift) %{ match(Set dst (RShiftVI src shift)); ins_cost(VEC_COST); - format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %} + format %{ "vasrI $dst, $src, $shift" %} ins_encode %{ __ rvv_vsetvli(T_INT, Matcher::vector_length_in_bytes(this)); __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), @@ -1324,53 +1510,109 @@ instruct vasrI(vReg dst, vReg src, vReg shift) %{ instruct vasrL(vReg dst, vReg src, vReg shift) %{ match(Set dst (RShiftVL src shift)); ins_cost(VEC_COST); - format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %} + format %{ "vasrL $dst, $src, $shift" %} ins_encode %{ __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this)); __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), - as_VectorRegister($shift$$reg)); + as_VectorRegister($shift$$reg)); %} ins_pipe(pipe_slow); %} -instruct vlslB(vReg dst, vReg src, vReg shift) %{ +instruct vasrB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{ + match(Set dst_src (RShiftVB (Binary dst_src shift) vmask)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst_src, TEMP v0); + format %{ "vasrB_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %} + ins_encode %{ + __ rvv_vsetvli(T_BYTE, Matcher::vector_length_in_bytes(this)); + __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1); + // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits + __ vmerge_vim(as_VectorRegister($shift$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1); + // otherwise, shift + __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg)); + __ vsra_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{ + match(Set dst_src (RShiftVS (Binary dst_src shift) vmask)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst_src, TEMP v0); + format %{ "vasrS_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %} + ins_encode %{ + __ rvv_vsetvli(T_SHORT, Matcher::vector_length_in_bytes(this)); + __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1); + // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits + __ vmerge_vim(as_VectorRegister($shift$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1); + // otherwise, shift + __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg)); + __ vsra_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ + match(Set dst_src (RShiftVI (Binary dst_src shift) v0)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst_src); + format %{ "vasrI_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + __ rvv_vsetvli(T_INT, Matcher::vector_length_in_bytes(this)); + __ vsra_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ + match(Set dst_src (RShiftVL (Binary dst_src shift) v0)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst_src); + format %{ "vasrL_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this)); + __ vsra_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ match(Set dst (LShiftVB src shift)); ins_cost(VEC_COST); - effect( TEMP_DEF dst); - format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t" - "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" - "vmnot.m v0, v0\n\t" - "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} + effect(TEMP_DEF dst, TEMP v0); + format %{ "vlslB $dst, $src, $shift" %} ins_encode %{ __ rvv_vsetvli(T_BYTE, Matcher::vector_length_in_bytes(this)); // if shift > BitsPerByte - 1, clear the element - __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); + __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1); __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg), Assembler::v0_t); // otherwise, shift - __ vmnot_m(v0, v0); + __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg)); __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($shift$$reg), Assembler::v0_t); %} ins_pipe(pipe_slow); %} -instruct vlslS(vReg dst, vReg src, vReg shift) %{ +instruct vlslS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ match(Set dst (LShiftVS src shift)); ins_cost(VEC_COST); - effect(TEMP_DEF dst); - format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t" - "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" - "vmnot.m v0, v0\n\t" - "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} + effect(TEMP_DEF dst, TEMP v0); + format %{ "vlslS $dst, $src, $shift" %} ins_encode %{ __ rvv_vsetvli(T_SHORT, Matcher::vector_length_in_bytes(this)); // if shift > BitsPerShort - 1, clear the element - __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); + __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1); __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg), Assembler::v0_t); // otherwise, shift - __ vmnot_m(v0, v0); + __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg)); __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($shift$$reg), Assembler::v0_t); %} @@ -1380,7 +1622,7 @@ instruct vlslS(vReg dst, vReg src, vReg shift) %{ instruct vlslI(vReg dst, vReg src, vReg shift) %{ match(Set dst (LShiftVI src shift)); ins_cost(VEC_COST); - format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %} + format %{ "vlslI $dst, $src, $shift" %} ins_encode %{ __ rvv_vsetvli(T_INT, Matcher::vector_length_in_bytes(this)); __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), @@ -1392,7 +1634,7 @@ instruct vlslI(vReg dst, vReg src, vReg shift) %{ instruct vlslL(vReg dst, vReg src, vReg shift) %{ match(Set dst (LShiftVL src shift)); ins_cost(VEC_COST); - format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %} + format %{ "vlslL $dst, $src, $shift" %} ins_encode %{ __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this)); __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), @@ -1401,55 +1643,116 @@ instruct vlslL(vReg dst, vReg src, vReg shift) %{ ins_pipe(pipe_slow); %} -instruct vlsrB(vReg dst, vReg src, vReg shift) %{ - match(Set dst (URShiftVB src shift)); +instruct vlslB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{ + match(Set dst_src (LShiftVB (Binary dst_src shift) vmask)); ins_cost(VEC_COST); - effect(TEMP_DEF dst); - format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t" - "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" - "vmnot.m v0, v0, v0\n\t" - "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} + effect(TEMP_DEF dst_src, TEMP v0); + format %{ "vlslB_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %} ins_encode %{ __ rvv_vsetvli(T_BYTE, Matcher::vector_length_in_bytes(this)); // if shift > BitsPerByte - 1, clear the element - __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); - __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), - as_VectorRegister($src$$reg), Assembler::v0_t); + __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1); + __ vmand_mm(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), + as_VectorRegister($vmask$$reg)); + __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($dst_src$$reg), Assembler::v0_t); // otherwise, shift - __ vmnot_m(v0, v0); - __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg)); + __ vsll_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), as_VectorRegister($shift$$reg), Assembler::v0_t); %} ins_pipe(pipe_slow); %} -instruct vlsrS(vReg dst, vReg src, vReg shift) %{ - match(Set dst (URShiftVS src shift)); +instruct vlslS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{ + match(Set dst_src (LShiftVS (Binary dst_src shift) vmask)); ins_cost(VEC_COST); - effect(TEMP_DEF dst); - format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t" - "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" - "vmnot.m v0, v0\n\t" - "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} + effect(TEMP_DEF dst_src, TEMP v0); + format %{ "vlslS_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %} ins_encode %{ __ rvv_vsetvli(T_SHORT, Matcher::vector_length_in_bytes(this)); // if shift > BitsPerShort - 1, clear the element - __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); + __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1); + __ vmand_mm(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), + as_VectorRegister($vmask$$reg)); + __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($dst_src$$reg), Assembler::v0_t); + // otherwise, shift + __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg)); + __ vsll_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ + match(Set dst_src (LShiftVI (Binary dst_src shift) v0)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst_src); + format %{ "vlslI_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + __ rvv_vsetvli(T_INT, Matcher::vector_length_in_bytes(this)); + __ vsll_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ + match(Set dst_src (LShiftVL (Binary dst_src shift) v0)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst_src); + format %{ "vlslL_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this)); + __ vsll_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ + match(Set dst (URShiftVB src shift)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst, TEMP v0); + format %{ "vlsrB $dst, $src, $shift" %} + ins_encode %{ + __ rvv_vsetvli(T_BYTE, Matcher::vector_length_in_bytes(this)); + // if shift > BitsPerByte - 1, clear the element + __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1); __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg), Assembler::v0_t); // otherwise, shift - __ vmnot_m(v0, v0); + __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg)); __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($shift$$reg), Assembler::v0_t); %} ins_pipe(pipe_slow); %} +instruct vlsrS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{ + match(Set dst (URShiftVS src shift)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst, TEMP v0); + format %{ "vlsrS $dst, $src, $shift" %} + ins_encode %{ + __ rvv_vsetvli(T_SHORT, Matcher::vector_length_in_bytes(this)); + // if shift > BitsPerShort - 1, clear the element + __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1); + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg), Assembler::v0_t); + // otherwise, shift + __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg)); + __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} instruct vlsrI(vReg dst, vReg src, vReg shift) %{ match(Set dst (URShiftVI src shift)); ins_cost(VEC_COST); - format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %} + format %{ "vlsrI $dst, $src, $shift" %} ins_encode %{ __ rvv_vsetvli(T_INT, Matcher::vector_length_in_bytes(this)); __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), @@ -1458,11 +1761,10 @@ instruct vlsrI(vReg dst, vReg src, vReg shift) %{ ins_pipe(pipe_slow); %} - instruct vlsrL(vReg dst, vReg src, vReg shift) %{ match(Set dst (URShiftVL src shift)); ins_cost(VEC_COST); - format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %} + format %{ "vlsrL $dst, $src, $shift" %} ins_encode %{ __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this)); __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), @@ -1471,6 +1773,74 @@ instruct vlsrL(vReg dst, vReg src, vReg shift) %{ ins_pipe(pipe_slow); %} +instruct vlsrB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{ + match(Set dst_src (URShiftVB (Binary dst_src shift) vmask)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst_src, TEMP v0); + format %{ "vlsrB_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %} + ins_encode %{ + __ rvv_vsetvli(T_BYTE, Matcher::vector_length_in_bytes(this)); + // if shift > BitsPerByte - 1, clear the element + __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1); + __ vmand_mm(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), + as_VectorRegister($vmask$$reg)); + __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($dst_src$$reg), Assembler::v0_t); + // otherwise, shift + __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg)); + __ vsrl_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{ + match(Set dst_src (URShiftVS (Binary dst_src shift) vmask)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst_src, TEMP v0); + format %{ "vlsrS_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %} + ins_encode %{ + __ rvv_vsetvli(T_SHORT, Matcher::vector_length_in_bytes(this)); + // if shift > BitsPerShort - 1, clear the element + __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1); + __ vmand_mm(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), + as_VectorRegister($vmask$$reg)); + __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($dst_src$$reg), Assembler::v0_t); + // otherwise, shift + __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg)); + __ vsrl_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ + match(Set dst_src (URShiftVI (Binary dst_src shift) v0)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst_src); + format %{ "vlsrI_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + __ rvv_vsetvli(T_INT, Matcher::vector_length_in_bytes(this)); + __ vsrl_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{ + match(Set dst_src (URShiftVL (Binary dst_src shift) v0)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst_src); + format %{ "vlsrL_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this)); + __ vsrl_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ match(Set dst (RShiftVB src (RShiftCntV shift))); ins_cost(VEC_COST); @@ -1827,13 +2197,45 @@ instruct vsubD(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +// vector sub - predicated + +instruct vsub_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst_src1 (SubVB (Binary dst_src1 src2) v0)); + match(Set dst_src1 (SubVS (Binary dst_src1 src2) v0)); + match(Set dst_src1 (SubVI (Binary dst_src1 src2) v0)); + match(Set dst_src1 (SubVL (Binary dst_src1 src2) v0)); + ins_cost(VEC_COST); + format %{ "vsub.vv $dst_src1, $src2, $v0\t#@vsub_masked" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vsub_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vsub_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst_src1 (SubVF (Binary dst_src1 src2) v0)); + match(Set dst_src1 (SubVD (Binary dst_src1 src2) v0)); + ins_cost(VEC_COST); + format %{ "vfsub.vv $dst_src1, $src2, $v0\t#@vsub_fp_masked" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vfsub_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, iRegI_R10 result, vReg_V1 v1, - vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) + vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, rFlagsReg cr) %{ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (StrEquals (Binary str1 str2) cnt)); - effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, TEMP v0, KILL cr); format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} ins_encode %{ @@ -1846,11 +2248,11 @@ instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, iRegI_R10 result, vReg_V1 v1, - vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) + vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, rFlagsReg cr) %{ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (StrEquals (Binary str1 str2) cnt)); - effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, TEMP v0, KILL cr); format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} ins_encode %{ @@ -1862,11 +2264,11 @@ instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, %} instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, - vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegP_R28 tmp, rFlagsReg cr) %{ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (AryEq ary1 ary2)); - effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); + effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, TEMP v0, KILL cr); format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} ins_encode %{ @@ -1877,11 +2279,11 @@ instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, %} instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, - vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegP_R28 tmp, rFlagsReg cr) %{ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (AryEq ary1 ary2)); - effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); + effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, TEMP v0, KILL cr); format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} ins_encode %{ @@ -1893,12 +2295,12 @@ instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, - iRegP_R28 tmp1, iRegL_R29 tmp2) + vRegMask_V0 v0, iRegP_R28 tmp1, iRegL_R29 tmp2) %{ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, - TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5, TEMP v0); format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} ins_encode %{ @@ -1912,12 +2314,12 @@ instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_ %} instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, - iRegP_R28 tmp1, iRegL_R29 tmp2) + vRegMask_V0 v0, iRegP_R28 tmp1, iRegL_R29 tmp2) %{ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, - TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5, TEMP v0); format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} ins_encode %{ @@ -1931,12 +2333,12 @@ instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_ instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, - iRegP_R28 tmp1, iRegL_R29 tmp2) + vRegMask_V0 v0, iRegP_R28 tmp1, iRegL_R29 tmp2) %{ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, - TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5, TEMP v0); format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} ins_encode %{ @@ -1949,12 +2351,12 @@ instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI %} instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, - iRegP_R28 tmp1, iRegL_R29 tmp2) + vRegMask_V0 v0, iRegP_R28 tmp1, iRegL_R29 tmp2) %{ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, - TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5, TEMP v0); format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} ins_encode %{ @@ -1968,11 +2370,11 @@ instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI // fast byte[] to char[] inflation instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len, - vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegLNoSp tmp) + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegLNoSp tmp) %{ predicate(UseRVV); match(Set dummy (StrInflatedCopy src (Binary dst len))); - effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len); + effect(TEMP v1, TEMP v2, TEMP v3, TEMP v0, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len); format %{ "String Inflate $src,$dst" %} ins_encode %{ @@ -1983,12 +2385,12 @@ instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 // encode char[] to byte[] in ISO_8859_1 instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, - vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegLNoSp tmp) + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegLNoSp tmp) %{ predicate(UseRVV); match(Set result (EncodeISOArray src (Binary dst len))); effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, - TEMP v1, TEMP v2, TEMP v3, TEMP tmp); + TEMP v1, TEMP v2, TEMP v3, TEMP tmp, TEMP v0); format %{ "Encode array $src,$dst,$len -> $result" %} ins_encode %{ @@ -2000,12 +2402,12 @@ instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R1 // fast char[] to byte[] compression instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, - vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegLNoSp tmp) + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegLNoSp tmp) %{ predicate(UseRVV); match(Set result (StrCompressedCopy src (Binary dst len))); effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, - TEMP v1, TEMP v2, TEMP v3, TEMP tmp); + TEMP v1, TEMP v2, TEMP v3, TEMP tmp, TEMP v0); format %{ "String Compress $src,$dst -> $result // KILL R11, R12, R13" %} ins_encode %{ @@ -2016,11 +2418,11 @@ instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 %} instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, - vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegLNoSp tmp) + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegLNoSp tmp) %{ predicate(UseRVV); match(Set result (CountPositives ary len)); - effect(TEMP_DEF result, USE_KILL ary, USE_KILL len, TEMP v1, TEMP v2, TEMP v3, TEMP tmp); + effect(TEMP_DEF result, USE_KILL ary, USE_KILL len, TEMP v1, TEMP v2, TEMP v3, TEMP tmp, TEMP v0); format %{ "count positives byte[] $ary, $len -> $result" %} ins_encode %{ @@ -2032,12 +2434,12 @@ instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, - vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0) %{ predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, - TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); + TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3, TEMP v0); format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %} @@ -2052,12 +2454,12 @@ instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, - vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0) %{ predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, - TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); + TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3, TEMP v0); format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %} @@ -2072,11 +2474,11 @@ instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, // clearing of an array instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, - vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3) + vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3, vRegMask_V0 v0) %{ predicate(!UseBlockZeroing && UseRVV); match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3); + effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3, TEMP v0); format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} @@ -2101,4 +2503,188 @@ instruct vloadcon(vReg dst, immI0 src) %{ } %} ins_pipe(pipe_slow); +%} + +instruct vmask_gen_I(vRegMask dst, iRegI src) %{ + match(Set dst (VectorMaskGen (ConvI2L src))); + format %{ "vmask_gen_I $dst, $src" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + __ vsetvli(t0, $src$$Register, sew); + __ vmset_m(as_VectorRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_gen_L(vRegMask dst, iRegL src) %{ + match(Set dst (VectorMaskGen src)); + format %{ "vmask_gen_L $dst, $src" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + __ vsetvli(t0, $src$$Register, sew); + __ vmset_m(as_VectorRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_gen_imm(vRegMask dst, immL con) %{ + match(Set dst (VectorMaskGen con)); + format %{ "vmask_gen_imm $dst, $con" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, (uint)($con$$constant)); + __ vmset_m(as_VectorRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmaskAll_immI(vRegMask dst, immI src) %{ + match(Set dst (MaskAll src)); + format %{ "vmaskAll_immI $dst, $src" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + int con = (int)$src$$constant; + if (con == 0) { + __ vmclr_m(as_VectorRegister($dst$$reg)); + } else { + assert(con == -1, "invalid constant value for mask"); + __ vmset_m(as_VectorRegister($dst$$reg)); + } + %} + ins_pipe(pipe_slow); +%} + +instruct vmaskAllI(vRegMask dst, iRegI src) %{ + match(Set dst (MaskAll src)); + format %{ "vmaskAllI $dst, $src" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); + __ vmsne_vx(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), zr); + %} + ins_pipe(pipe_slow); +%} + +instruct vmaskAll_immL(vRegMask dst, immL src) %{ + match(Set dst (MaskAll src)); + format %{ "vmaskAll_immL $dst, $src" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + long con = (long)$src$$constant; + if (con == 0) { + __ vmclr_m(as_VectorRegister($dst$$reg)); + } else { + assert(con == -1, "invalid constant value for mask"); + __ vmset_m(as_VectorRegister($dst$$reg)); + } + %} + ins_pipe(pipe_slow); +%} + +instruct vmaskAllL(vRegMask dst, iRegL src) %{ + match(Set dst (MaskAll src)); + format %{ "vmaskAllL $dst, $src" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); + __ vmsne_vx(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), zr); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector mask basic OPs ------------------------ + +// vector mask logical ops: and/or/xor + +instruct vmask_and(vRegMask dst, vRegMask src1, vRegMask src2) %{ + match(Set dst (AndVMask src1 src2)); + format %{ "vmask_and $dst, $src1, $src2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vmand_mm(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_or(vRegMask dst, vRegMask src1, vRegMask src2) %{ + match(Set dst (OrVMask src1 src2)); + format %{ "vmask_or $dst, $src1, $src2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vmor_mm(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmask_xor(vRegMask dst, vRegMask src1, vRegMask src2) %{ + match(Set dst (XorVMask src1 src2)); + format %{ "vmask_xor $dst, $src1, $src2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vmxor_mm(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmaskcast(vRegMask dst_src) %{ + match(Set dst_src (VectorMaskCast dst_src)); + ins_cost(0); + format %{ "vmaskcast $dst_src\t# do nothing" %} + ins_encode(/* empty encoding */); + ins_pipe(pipe_class_empty); +%} + +// vector load/store - predicated + +instruct loadV_masked(vReg dst, vmemA mem, vRegMask_V0 v0) %{ + match(Set dst (LoadVectorMasked mem v0)); + format %{ "loadV_masked $dst, $mem, $v0" %} + ins_encode %{ + VectorRegister dst_reg = as_VectorRegister($dst$$reg); + loadStore(C2_MacroAssembler(&cbuf), false, dst_reg, + Matcher::vector_element_basic_type(this), as_Register($mem$$base), + Matcher::vector_length_in_bytes(this), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct storeV_masked(vReg src, vmemA mem, vRegMask_V0 v0) %{ + match(Set mem (StoreVectorMasked mem (Binary src v0))); + format %{ "storeV_masked $mem, $src, $v0" %} + ins_encode %{ + VectorRegister src_reg = as_VectorRegister($src$$reg); + loadStore(C2_MacroAssembler(&cbuf), true, src_reg, + Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base), + Matcher::vector_length_in_bytes(this, $src), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector blend --------------------------------- + +instruct vblend(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst (VectorBlend (Binary src1 src2) v0)); + format %{ "vmerge_vvm $dst, $src1, $src2, v0\t#@vector blend" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this)); + __ vmerge_vvm(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); %} \ No newline at end of file