diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
index 35f36a1f1c0..d1669cd3737 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -1488,6 +1488,16 @@ enum VectorMask {
 
 #undef INSN
 
+#define INSN(NAME, op, funct3, vm, funct6)                                                         \
+  void NAME(VectorRegister Vd, VectorRegister Vs2, Register Rs1) {                                 \
+    patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6);                            \
+  }
+
+  // Vector Integer Merge Instructions
+  INSN(vmerge_vxm,  0b1010111, 0b100, 0b0, 0b010111);
+
+#undef INSN
+
 #define INSN(NAME, op, funct3, funct6)                                                             \
   void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1, VectorMask vm = unmasked) {  \
     patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6);                            \
@@ -1542,6 +1552,17 @@ enum VectorMask {
 
 #undef INSN
 
+#define INSN(NAME, op, funct3, vm, funct6)                                    \
+  void NAME(VectorRegister Vd, VectorRegister Vs2, int32_t imm) {             \
+    guarantee(is_simm5(imm), "imm is invalid");                               \
+    patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6);    \
+  }
+
+  // Vector Integer Merge Instructions
+  INSN(vmerge_vim,  0b1010111, 0b011, 0b0, 0b010111);
+
+#undef INSN
+
 #define INSN(NAME, op, funct3, vm, funct6)                                   \
   void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1) {     \
     patch_VArith(op, Vd, funct3, Vs1->raw_encoding(), Vs2, vm, funct6);      \
@@ -1560,6 +1581,9 @@ enum VectorMask {
   INSN(vmnand_mm,   0b1010111, 0b010, 0b1, 0b011101);
   INSN(vmand_mm,    0b1010111, 0b010, 0b1, 0b011001);
 
+  // Vector Integer Merge Instructions
+  INSN(vmerge_vvm,  0b1010111, 0b000, 0b0, 0b010111);
+
 #undef INSN
 
 #define INSN(NAME, op, funct3, Vs2, vm, funct6)                            \
diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
index ba8f221e291..01d99db782c 100644
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@@ -1304,7 +1304,7 @@ void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Regis
 }
 
 // Set dst to NaN if any NaN input.
-void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
+void C2_MacroAssembler::minmax_fp(FloatRegister dst, FloatRegister src1, FloatRegister src2,
                                   bool is_double, bool is_min) {
   assert_different_registers(dst, src1, src2);
 
@@ -1616,7 +1616,7 @@ void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1,
 }
 
 // Set dst to NaN if any NaN input.
-void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
+void C2_MacroAssembler::minmax_fp_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
                                     bool is_double, bool is_min, int length_in_bytes) {
   assert_different_registers(dst, src1, src2);
 
@@ -1632,7 +1632,7 @@ void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, Vec
 }
 
 // Set dst to NaN if any NaN input.
-void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst,
+void C2_MacroAssembler::reduce_minmax_fp_v(FloatRegister dst,
                                            FloatRegister src1, VectorRegister src2,
                                            VectorRegister tmp1, VectorRegister tmp2,
                                            bool is_double, bool is_min, int length_in_bytes) {
@@ -1722,3 +1722,64 @@ void C2_MacroAssembler::rvv_vsetvli(BasicType bt, int length_in_bytes, Register
     }
   }
 }
+
+void C2_MacroAssembler::compare_integral_v(VectorRegister vd, BasicType bt, int length_in_bytes,
+                                           VectorRegister src1, VectorRegister src2, int cond, VectorMask vm) {
+  assert(is_integral_type(bt), "unsupported element type");
+  assert(vm == Assembler::v0_t ? vd != v0 : true, "should be different registers");
+  rvv_vsetvli(bt, length_in_bytes);
+  vmclr_m(vd);
+  switch (cond) {
+    case BoolTest::eq: vmseq_vv(vd, src1, src2, vm); break;
+    case BoolTest::ne: vmsne_vv(vd, src1, src2, vm); break;
+    case BoolTest::le: vmsle_vv(vd, src1, src2, vm); break;
+    case BoolTest::ge: vmsge_vv(vd, src1, src2, vm); break;
+    case BoolTest::lt: vmslt_vv(vd, src1, src2, vm); break;
+    case BoolTest::gt: vmsgt_vv(vd, src1, src2, vm); break;
+    default:
+      assert(false, "unsupported compare condition");
+      ShouldNotReachHere();
+  }
+}
+
+void C2_MacroAssembler::compare_floating_point_v(VectorRegister vd, BasicType bt, int length_in_bytes,
+                                                 VectorRegister src1, VectorRegister src2,
+                                                 VectorRegister tmp1, VectorRegister tmp2,
+                                                 VectorRegister vmask, int cond, VectorMask vm) {
+  assert(is_floating_point_type(bt), "unsupported element type");
+  assert(vd != v0, "should be different registers");
+  assert(vm == Assembler::v0_t ? vmask != v0 : true, "vmask should not be v0");
+  rvv_vsetvli(bt, length_in_bytes);
+  // Check vector elements of src1 and src2 for quiet or signaling NaN.
+  vfclass_v(tmp1, src1);
+  vfclass_v(tmp2, src2);
+  vsrl_vi(tmp1, tmp1, 8);
+  vsrl_vi(tmp2, tmp2, 8);
+  vmseq_vx(tmp1, tmp1, zr);
+  vmseq_vx(tmp2, tmp2, zr);
+  if (vm == Assembler::v0_t) {
+    vmand_mm(tmp2, tmp1, tmp2);
+    if (cond == BoolTest::ne) {
+      vmandn_mm(tmp1, vmask, tmp2);
+    }
+    vmand_mm(v0, vmask, tmp2);
+  } else {
+    vmand_mm(v0, tmp1, tmp2);
+    if (cond == BoolTest::ne) {
+      vmnot_m(tmp1, v0);
+    }
+  }
+  vmclr_m(vd);
+  switch (cond) {
+    case BoolTest::eq: vmfeq_vv(vd, src1, src2, Assembler::v0_t); break;
+    case BoolTest::ne: vmfne_vv(vd, src1, src2, Assembler::v0_t);
+                       vmor_mm(vd, vd, tmp1); break;
+    case BoolTest::le: vmfle_vv(vd, src1, src2, Assembler::v0_t); break;
+    case BoolTest::ge: vmfge_vv(vd, src1, src2, Assembler::v0_t); break;
+    case BoolTest::lt: vmflt_vv(vd, src1, src2, Assembler::v0_t); break;
+    case BoolTest::gt: vmfgt_vv(vd, src1, src2, Assembler::v0_t); break;
+    default:
+      assert(false, "unsupported compare condition");
+      ShouldNotReachHere();
+  }
+}
\ No newline at end of file
diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
index 94a5068fd9a..30aac05f40b 100644
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
@@ -137,13 +137,15 @@
     vl1re8_v(v, t0);
   }
 
-  void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) {
-    assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size");
-    unspill(v0, src_offset);
-    spill(v0, dst_offset);
+  void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vector_length_in_bytes) {
+    assert(vector_length_in_bytes % 16 == 0, "unexpected vector reg size");
+    for (int i = 0; i < vector_length_in_bytes / 8; i++) {
+      unspill(t0, true, src_offset + (i * 8));
+      spill(t0, true, dst_offset + (i * 8));
+    }
   }
 
-  void minmax_FD(FloatRegister dst,
+  void minmax_fp(FloatRegister dst,
                  FloatRegister src1, FloatRegister src2,
                  bool is_double, bool is_min);
 
@@ -183,11 +185,11 @@
                             Register tmp1, Register tmp2,
                             bool isL);
 
- void minmax_FD_v(VectorRegister dst,
+ void minmax_fp_v(VectorRegister dst,
                   VectorRegister src1, VectorRegister src2,
                   bool is_double, bool is_min, int length_in_bytes);
 
- void reduce_minmax_FD_v(FloatRegister dst,
+ void reduce_minmax_fp_v(FloatRegister dst,
                          FloatRegister src1, VectorRegister src2,
                          VectorRegister tmp1, VectorRegister tmp2,
                          bool is_double, bool is_min, int length_in_bytes);
@@ -198,4 +200,34 @@
 
  void rvv_vsetvli(BasicType bt, int length_in_bytes, Register tmp = t0);
 
+ void compare_integral_v(VectorRegister dst, BasicType bt, int length_in_bytes,
+                         VectorRegister src1, VectorRegister src2, int cond, VectorMask vm = Assembler::unmasked);
+
+ void compare_floating_point_v(VectorRegister dst, BasicType bt, int length_in_bytes,
+                               VectorRegister src1, VectorRegister src2, VectorRegister tmp1, VectorRegister tmp2,
+                               VectorRegister vmask, int cond, VectorMask vm = Assembler::unmasked);
+
+ // In Matcher::scalable_predicate_reg_slots,
+ // we assume each predicate register is one-eighth of the size of
+ // scalable vector register, one mask bit per vector byte.
+ void spill_vmask(VectorRegister v, int offset){
+   rvv_vsetvli(T_BYTE, MaxVectorSize >> 3);
+   add(t0, sp, offset);
+   vse8_v(v, t0);
+ }
+
+ void unspill_vmask(VectorRegister v, int offset){
+   rvv_vsetvli(T_BYTE, MaxVectorSize >> 3);
+   add(t0, sp, offset);
+   vle8_v(v, t0);
+ }
+
+  void spill_copy_vmask_stack_to_stack(int src_offset, int dst_offset, int vector_length_in_bytes) {
+    assert(vector_length_in_bytes % 4 == 0, "unexpected vector mask reg size");
+    for (int i = 0; i < vector_length_in_bytes / 4; i++) {
+      unspill(t0, false, src_offset + (i * 4));
+      spill(t0, false, dst_offset + (i * 4));
+    }
+  }
+
 #endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
index 18a03eff7ee..c60d1a5ad66 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -1264,7 +1264,7 @@ public:
     vmnand_mm(vd, vs, vs);
   }
 
-  inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
+  inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
     vnsrl_wx(vd, vs, x0, vm);
   }
 
@@ -1276,6 +1276,45 @@ public:
     vfsgnjn_vv(vd, vs, vs);
   }
 
+  inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
+    vmslt_vv(vd, vs1, vs2, vm);
+  }
+
+  inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
+    vmsltu_vv(vd, vs1, vs2, vm);
+  }
+
+  inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
+    vmsle_vv(vd, vs1, vs2, vm);
+  }
+
+  inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
+    vmsleu_vv(vd, vs1, vs2, vm);
+  }
+
+  inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
+    vmflt_vv(vd, vs1, vs2, vm);
+  }
+
+  inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
+    vmfle_vv(vd, vs1, vs2, vm);
+  }
+
+  // Copy mask register
+  inline void vmmv_m(VectorRegister vd, VectorRegister vs) {
+    vmand_mm(vd, vs, vs);
+  }
+
+  // Clear mask register
+  inline void vmclr_m(VectorRegister vd) {
+    vmxor_mm(vd, vd, vd);
+  }
+
+  // Set mask register
+  inline void vmset_m(VectorRegister vd) {
+    vmxnor_mm(vd, vd, vd);
+  }
+
   static const int zero_words_block_size;
 
   void cast_primitive_type(BasicType type, Register Rt) {
diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp
index eeee72f3910..a2b38ee4a48 100644
--- a/src/hotspot/cpu/riscv/matcher_riscv.hpp
+++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp
@@ -149,7 +149,7 @@
 
   // Some microarchitectures have mask registers used on vectors
   static const bool has_predicated_vectors(void) {
-    return false;
+    return UseRVV;
   }
 
   // true means we have fast l2f conversion
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
index 708defd68e7..88dd95a1b8a 100644
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -830,7 +830,8 @@ reg_class double_reg(
     F31, F31_H
 );
 
-// Class for all RVV vector registers
+// Class for RVV vector registers
+// Note: v0, v30 and v31 are used as mask registers.
 reg_class vectora_reg(
     V1, V1_H, V1_J, V1_K,
     V2, V2_H, V2_J, V2_K,
@@ -860,9 +861,7 @@ reg_class vectora_reg(
     V26, V26_H, V26_J, V26_K,
     V27, V27_H, V27_J, V27_K,
     V28, V28_H, V28_J, V28_K,
-    V29, V29_H, V29_J, V29_K,
-    V30, V30_H, V30_J, V30_K,
-    V31, V31_H, V31_J, V31_K
+    V29, V29_H, V29_J, V29_K
 );
 
 // Class for 64 bit register f0
@@ -912,6 +911,23 @@ reg_class v5_reg(
 
 // class for condition codes
 reg_class reg_flags(RFLAGS);
+
+// Class for RVV v0 mask register
+// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#53-vector-masking
+// The mask value used to control execution of a masked vector
+// instruction is always supplied by vector register v0.
+reg_class vmask_reg_v0 (
+    V0
+);
+
+// Class for RVV mask registers
+// We need two more vmask registers to do the vector mask logical ops,
+// so define v30, v31 as mask register too.
+reg_class vmask_reg (
+    V0,
+    V30,
+    V31
+);
 %}
 
 //----------DEFINITION BLOCK---------------------------------------------------
@@ -1522,7 +1538,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
 
   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
 
-  if (src_hi != OptoReg::Bad) {
+  if (src_hi != OptoReg::Bad && !bottom_type()->isa_vectmask()) {
     assert((src_lo & 1) == 0 && src_lo + 1 == src_hi &&
            (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi,
            "expected aligned-adjacent pairs");
@@ -1558,6 +1574,25 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
       } else {
         ShouldNotReachHere();
       }
+    } else if (bottom_type()->isa_vectmask() && cbuf) {
+      C2_MacroAssembler _masm(cbuf);
+      int vmask_size_in_bytes = Matcher::scalable_predicate_reg_slots() * 32 / 8;
+      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
+        // stack to stack
+        __ spill_copy_vmask_stack_to_stack(src_offset, dst_offset,
+                                           vmask_size_in_bytes);
+      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
+        // vmask to stack
+        __ spill_vmask(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo));
+      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
+        // stack to vmask
+        __ unspill_vmask(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo));
+      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
+        // vmask to vmask
+        __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo]));
+      } else {
+        ShouldNotReachHere();
+      }
     }
   } else if (cbuf != NULL) {
     C2_MacroAssembler _masm(cbuf);
@@ -1642,7 +1677,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     } else {
       st->print("%s", Matcher::regName[dst_lo]);
     }
-    if (bottom_type()->isa_vect() != NULL) {
+    if (bottom_type()->isa_vect() && !bottom_type()->isa_vectmask()) {
       int vsize = 0;
       if (ideal_reg() == Op_VecA) {
         vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
@@ -1650,6 +1685,10 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
         ShouldNotReachHere();
       }
       st->print("\t# vector spill size = %d", vsize);
+    } else if (ideal_reg() == Op_RegVectMask) {
+      assert(Matcher::supports_scalable_vector(), "bad register type for spill");
+      int vsize = Matcher::scalable_predicate_reg_slots() * 32;
+      st->print("\t# vmask spill size = %d", vsize);
     } else {
       st->print("\t# spill size = %d", is64 ? 64 : 32);
     }
@@ -1863,7 +1902,59 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
 }
 
 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
-  return false;
+  if (!UseRVV) {
+    return false;
+  }
+  switch (opcode) {
+    case Op_AddVB:
+    case Op_AddVS:
+    case Op_AddVI:
+    case Op_AddVL:
+    case Op_AddVF:
+    case Op_AddVD:
+    case Op_SubVB:
+    case Op_SubVS:
+    case Op_SubVI:
+    case Op_SubVL:
+    case Op_SubVF:
+    case Op_SubVD:
+    case Op_MulVB:
+    case Op_MulVS:
+    case Op_MulVI:
+    case Op_MulVL:
+    case Op_MulVF:
+    case Op_MulVD:
+    case Op_DivVF:
+    case Op_DivVD:
+    case Op_VectorLoadMask:
+    case Op_VectorMaskCmp:
+    case Op_AndVMask:
+    case Op_XorVMask:
+    case Op_OrVMask:
+    case Op_RShiftVB:
+    case Op_RShiftVS:
+    case Op_RShiftVI:
+    case Op_RShiftVL:
+    case Op_LShiftVB:
+    case Op_LShiftVS:
+    case Op_LShiftVI:
+    case Op_LShiftVL:
+    case Op_URShiftVB:
+    case Op_URShiftVS:
+    case Op_URShiftVI:
+    case Op_URShiftVL:
+    case Op_VectorBlend:
+      break;
+    case Op_LoadVector:
+      opcode = Op_LoadVectorMasked;
+      break;
+    case Op_StoreVector:
+      opcode = Op_StoreVectorMasked;
+      break;
+    default:
+      return false;
+  }
+  return match_rule_supported_vector(opcode, vlen, bt);
 }
 
 const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
@@ -1875,11 +1966,11 @@ const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
 }
 
 const RegMask* Matcher::predicate_reg_mask(void) {
-  return NULL;
+  return &_VMASK_REG_mask;
 }
 
 const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
-  return NULL;
+  return new TypeVectMask(elemTy, length);
 }
 
 // Vector calling convention not yet implemented.
@@ -3556,6 +3647,28 @@ operand vReg_V5()
   interface(REG_INTER);
 %}
 
+operand vRegMask()
+%{
+  constraint(ALLOC_IN_RC(vmask_reg));
+  match(RegVectMask);
+  match(vRegMask_V0);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// The mask value used to control execution of a masked
+// vector instruction is always supplied by vector register v0.
+operand vRegMask_V0()
+%{
+  constraint(ALLOC_IN_RC(vmask_reg_v0));
+  match(RegVectMask);
+  match(vRegMask);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Java Thread Register
 operand javaThread_RegP(iRegP reg)
 %{
@@ -7271,7 +7384,7 @@ instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{
   format %{ "maxF $dst, $src1, $src2" %}
 
   ins_encode %{
-    __ minmax_FD(as_FloatRegister($dst$$reg),
+    __ minmax_fp(as_FloatRegister($dst$$reg),
                  as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
                  false /* is_double */, false /* is_min */);
   %}
@@ -7287,7 +7400,7 @@ instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{
   format %{ "minF $dst, $src1, $src2" %}
 
   ins_encode %{
-    __ minmax_FD(as_FloatRegister($dst$$reg),
+    __ minmax_fp(as_FloatRegister($dst$$reg),
                  as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
                  false /* is_double */, true /* is_min */);
   %}
@@ -7303,7 +7416,7 @@ instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{
   format %{ "maxD $dst, $src1, $src2" %}
 
   ins_encode %{
-    __ minmax_FD(as_FloatRegister($dst$$reg),
+    __ minmax_fp(as_FloatRegister($dst$$reg),
                  as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
                  true /* is_double */, false /* is_min */);
   %}
@@ -7319,7 +7432,7 @@ instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{
   format %{ "minD $dst, $src1, $src2" %}
 
   ins_encode %{
-    __ minmax_FD(as_FloatRegister($dst$$reg),
+    __ minmax_fp(as_FloatRegister($dst$$reg),
                  as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
                  true /* is_double */, true /* is_min */);
   %}
diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
index 700cb18eafb..240baa1b577 100644
--- a/src/hotspot/cpu/riscv/riscv_v.ad
+++ b/src/hotspot/cpu/riscv/riscv_v.ad
@@ -35,14 +35,18 @@ source_hpp %{
 source %{
 
   static void loadStore(C2_MacroAssembler masm, bool is_store,
-                        VectorRegister reg, BasicType bt, Register base, int length_in_bytes) {
+                        VectorRegister reg, BasicType bt, Register base,
+                        int length_in_bytes, Assembler::VectorMask vm = Assembler::unmasked) {
     Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
     masm.rvv_vsetvli(bt, length_in_bytes);
 
     if (is_store) {
-      masm.vsex_v(reg, base, sew);
+      masm.vsex_v(reg, base, sew, vm);
     } else {
-      masm.vlex_v(reg, base, sew);
+      if (vm == Assembler::v0_t) {
+        masm.vxor_vv(reg, reg, reg);
+      }
+      masm.vlex_v(reg, base, sew, vm);
     }
   }
 
@@ -66,7 +70,6 @@ source %{
       // Vector API specific
       case Op_LoadVectorGather:
       case Op_StoreVectorScatter:
-      case Op_VectorBlend:
       case Op_VectorCast:
       case Op_VectorCastB2X:
       case Op_VectorCastD2X:
@@ -75,12 +78,9 @@ source %{
       case Op_VectorCastL2X:
       case Op_VectorCastS2X:
       case Op_VectorInsert:
-      case Op_VectorLoadMask:
       case Op_VectorLoadShuffle:
-      case Op_VectorMaskCmp:
       case Op_VectorRearrange:
       case Op_VectorReinterpret:
-      case Op_VectorStoreMask:
       case Op_VectorTest:
       case Op_PopCountVI:
       case Op_PopCountVL:
@@ -123,6 +123,112 @@ instruct storeV(vReg src, vmemA mem) %{
   ins_pipe(pipe_slow);
 %}
 
+// vector load mask
+
+instruct vloadmask(vRegMask dst, vReg src) %{
+  match(Set dst (VectorLoadMask src));
+  format %{ "vloadmask $dst, $src" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_BOOLEAN, Matcher::vector_length(this));
+    __ vmsne_vx(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), zr);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vloadmask_masked(vRegMask dst, vReg src, vRegMask_V0 v0) %{
+  match(Set dst (VectorLoadMask src v0));
+  format %{ "vloadmask_masked $dst, $src, $v0" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_BOOLEAN, Matcher::vector_length(this));
+    __ vmsne_vx(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), zr, Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector store mask
+
+instruct vstoremask(vReg dst, vRegMask_V0 v0, immI size) %{
+  match(Set dst (VectorStoreMask v0 size));
+  format %{ "vstoremask $dst, V0" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_BOOLEAN, Matcher::vector_length(this));
+    __ vmv_v_x(as_VectorRegister($dst$$reg), zr);
+    __ vmerge_vim(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), 1);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector mask compare
+
+instruct vmaskcmp(vRegMask dst, vReg src1, vReg src2, immI cond) %{
+  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
+            Matcher::vector_element_basic_type(n) == T_SHORT ||
+            Matcher::vector_element_basic_type(n) == T_INT ||
+            Matcher::vector_element_basic_type(n) == T_LONG);
+  match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+  format %{ "vmaskcmp $dst, $src1, $src2, $cond" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    __ compare_integral_v(as_VectorRegister($dst$$reg), bt, length_in_bytes, as_VectorRegister($src1$$reg),
+                          as_VectorRegister($src2$$reg), (int)($cond$$constant));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmaskcmp_masked(vRegMask dst, vReg src1, vReg src2, immI cond, vRegMask_V0 v0) %{
+  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
+            Matcher::vector_element_basic_type(n) == T_SHORT ||
+            Matcher::vector_element_basic_type(n) == T_INT ||
+            Matcher::vector_element_basic_type(n) == T_LONG);
+  match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond v0)));
+  effect(TEMP_DEF dst);
+  format %{ "vmaskcmp_masked $dst, $src1, $src2, $cond, $v0" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    __ compare_integral_v(as_VectorRegister($dst$$reg), bt, length_in_bytes, as_VectorRegister($src1$$reg),
+                          as_VectorRegister($src2$$reg), (int)($cond$$constant), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector mask float compare
+
+instruct vmaskcmp_fp(vRegMask dst, vReg src1, vReg src2, immI cond, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{
+  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT ||
+            Matcher::vector_element_basic_type(n) == T_DOUBLE);
+  match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP v0);
+  format %{ "vmaskcmp_fp $dst, $src1, $src2, $cond\t# KILL $tmp1, $tmp2" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    __ compare_floating_point_v(as_VectorRegister($dst$$reg), bt, length_in_bytes,
+                                as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
+                                as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
+                                as_VectorRegister($v0$$reg), (int)($cond$$constant));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmaskcmp_fp_masked(vRegMask dst, vReg src1, vReg src2, immI cond, vRegMask vmask, vReg tmp1, vReg tmp2, vRegMask_V0 v0) %{
+  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT ||
+            Matcher::vector_element_basic_type(n) == T_DOUBLE);
+  match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond vmask)));
+  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP v0);
+  format %{ "vmaskcmp_fp_masked $dst, $src1, $src2, $cond, $vmask\t# KILL $tmp1, $tmp2, $v0" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    __ compare_floating_point_v(as_VectorRegister($dst$$reg), bt, length_in_bytes,
+                                as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
+                                as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
+                                as_VectorRegister($vmask$$reg), (int)($cond$$constant), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // vector abs
 
 instruct vabsB(vReg dst, vReg src, vReg tmp) %{
@@ -283,6 +389,40 @@ instruct vaddD(vReg dst, vReg src1, vReg src2) %{
   ins_pipe(pipe_slow);
 %}
 
+// vector add - predicated
+
+instruct vadd_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
+  match(Set dst_src1 (AddVB (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (AddVS (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (AddVI (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (AddVL (Binary dst_src1 src2) v0));
+  ins_cost(VEC_COST);
+  format %{ "vadd.vv $dst_src1, $src2, $v0\t#@vadd_masked" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vadd_vv(as_VectorRegister($dst_src1$$reg),
+               as_VectorRegister($dst_src1$$reg),
+               as_VectorRegister($src2$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vadd_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
+  match(Set dst_src1 (AddVF (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (AddVD (Binary dst_src1 src2) v0));
+  ins_cost(VEC_COST);
+  format %{ "vfadd.vv $dst_src1, $src2, $v0\t#@vadd_fp_masked" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vfadd_vv(as_VectorRegister($dst_src1$$reg),
+                as_VectorRegister($dst_src1$$reg),
+                as_VectorRegister($src2$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // vector and
 
 instruct vand(vReg dst, vReg src1, vReg src2) %{
@@ -290,7 +430,8 @@ instruct vand(vReg dst, vReg src1, vReg src2) %{
   ins_cost(VEC_COST);
   format %{ "vand.vv  $dst, $src1, $src2\t#@vand" %}
   ins_encode %{
-    __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this));
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
     __ vand_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
@@ -305,7 +446,8 @@ instruct vor(vReg dst, vReg src1, vReg src2) %{
   ins_cost(VEC_COST);
   format %{ "vor.vv  $dst, $src1, $src2\t#@vor" %}
   ins_encode %{
-    __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this));
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
     __ vor_vv(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_VectorRegister($src2$$reg));
@@ -320,7 +462,8 @@ instruct vxor(vReg dst, vReg src1, vReg src2) %{
   ins_cost(VEC_COST);
   format %{ "vxor.vv  $dst, $src1, $src2\t#@vxor" %}
   ins_encode %{
-    __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this));
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
     __ vxor_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
@@ -356,6 +499,23 @@ instruct vdivD(vReg dst, vReg src1, vReg src2) %{
   ins_pipe(pipe_slow);
 %}
 
+// vector float div - predicated
+
+instruct vdiv_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
+  match(Set dst_src1 (DivVF (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (DivVD (Binary dst_src1 src2) v0));
+  ins_cost(VEC_COST);
+  format %{ "vfdiv.vv  $dst_src1, $src2, $v0\t#@vdiv_fp_masked" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vfdiv_vv(as_VectorRegister($dst_src1$$reg),
+                as_VectorRegister($dst_src1$$reg),
+                as_VectorRegister($src2$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // vector integer max/min
 
 instruct vmax(vReg dst, vReg src1, vReg src2) %{
@@ -397,7 +557,7 @@ instruct vmaxF(vReg dst, vReg src1, vReg src2) %{
   ins_cost(VEC_COST);
   format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %}
   ins_encode %{
-    __ minmax_FD_v(as_VectorRegister($dst$$reg),
+    __ minmax_fp_v(as_VectorRegister($dst$$reg),
                    as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
                    false /* is_double */, false /* is_min */, Matcher::vector_length_in_bytes(this));
   %}
@@ -411,7 +571,7 @@ instruct vmaxD(vReg dst, vReg src1, vReg src2) %{
   ins_cost(VEC_COST);
   format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %}
   ins_encode %{
-    __ minmax_FD_v(as_VectorRegister($dst$$reg),
+    __ minmax_fp_v(as_VectorRegister($dst$$reg),
                    as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
                    true /* is_double */, false /* is_min */, Matcher::vector_length_in_bytes(this));
   %}
@@ -425,7 +585,7 @@ instruct vminF(vReg dst, vReg src1, vReg src2) %{
   ins_cost(VEC_COST);
   format %{ "vminF $dst, $src1, $src2\t#@vminF" %}
   ins_encode %{
-    __ minmax_FD_v(as_VectorRegister($dst$$reg),
+    __ minmax_fp_v(as_VectorRegister($dst$$reg),
                    as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
                    false /* is_double */, true /* is_min */, Matcher::vector_length_in_bytes(this));
   %}
@@ -439,7 +599,7 @@ instruct vminD(vReg dst, vReg src1, vReg src2) %{
   ins_cost(VEC_COST);
   format %{ "vminD $dst, $src1, $src2\t#@vminD" %}
   ins_encode %{
-    __ minmax_FD_v(as_VectorRegister($dst$$reg),
+    __ minmax_fp_v(as_VectorRegister($dst$$reg),
                    as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
                    true /* is_double */, true /* is_min */, Matcher::vector_length_in_bytes(this));
   %}
@@ -756,6 +916,38 @@ instruct vmulD(vReg dst, vReg src1, vReg src2) %{
   ins_pipe(pipe_slow);
 %}
 
+// vector mul - predicated
+
+instruct vmul_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
+  match(Set dst_src1 (MulVB (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (MulVS (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (MulVI (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (MulVL (Binary dst_src1 src2) v0));
+  ins_cost(VEC_COST);
+  format %{ "vmul.vv $dst_src1, $src2, $v0\t#@vmul_masked" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vmul_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
+               as_VectorRegister($src2$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmul_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
+  match(Set dst_src1 (MulVF (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (MulVD (Binary dst_src1 src2) v0));
+  ins_cost(VEC_COST);
+  format %{ "vmul.vv $dst_src1, $src2, $v0\t#@vmul_fp_masked" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vfmul_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
+                as_VectorRegister($src2$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // vector neg
 
 instruct vnegI(vReg dst, vReg src) %{
@@ -1000,7 +1192,7 @@ instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
   match(Set dst (MaxReductionV src1 src2));
   ins_cost(VEC_COST);
   effect(TEMP tmp);
-  format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %}
+  format %{ "vreduce_maxI $dst, $src1, $src2\t# KILL $tmp" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
     __ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
@@ -1015,7 +1207,7 @@ instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
   match(Set dst (MaxReductionV src1 src2));
   ins_cost(VEC_COST);
   effect(TEMP tmp);
-  format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %}
+  format %{ "vreduce_maxL $dst, $src1, $src2\t# KILL $tmp" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
     __ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
@@ -1034,7 +1226,7 @@ instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
   match(Set dst (MinReductionV src1 src2));
   ins_cost(VEC_COST);
   effect(TEMP tmp);
-  format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %}
+  format %{ "vreduce_minI $dst, $src1, $src2\t# KILL $tmp" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
     __ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
@@ -1049,7 +1241,7 @@ instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
   match(Set dst (MinReductionV src1 src2));
   ins_cost(VEC_COST);
   effect(TEMP tmp);
-  format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %}
+  format %{ "vreduce_minL $dst, $src1, $src2\t# KILL $tmp" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this, $src2);
     __ rvv_reduce_integral($dst$$Register, as_VectorRegister($tmp$$reg),
@@ -1068,7 +1260,7 @@ instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
   format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %}
   ins_encode %{
-    __ reduce_minmax_FD_v($dst$$FloatRegister,
+    __ reduce_minmax_fp_v($dst$$FloatRegister,
                           $src1$$FloatRegister, as_VectorRegister($src2$$reg),
                           as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                           false /* is_double */, false /* is_min */, Matcher::vector_length_in_bytes(this, $src2));
@@ -1083,7 +1275,7 @@ instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
   format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %}
   ins_encode %{
-    __ reduce_minmax_FD_v($dst$$FloatRegister,
+    __ reduce_minmax_fp_v($dst$$FloatRegister,
                           $src1$$FloatRegister, as_VectorRegister($src2$$reg),
                           as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                           true /* is_double */, false /* is_min */, Matcher::vector_length_in_bytes(this, $src2));
@@ -1100,7 +1292,7 @@ instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
   format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %}
   ins_encode %{
-    __ reduce_minmax_FD_v($dst$$FloatRegister,
+    __ reduce_minmax_fp_v($dst$$FloatRegister,
                           $src1$$FloatRegister, as_VectorRegister($src2$$reg),
                           as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                           false /* is_double */, true /* is_min */, Matcher::vector_length_in_bytes(this, $src2));
@@ -1115,7 +1307,7 @@ instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
   format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %}
   ins_encode %{
-    __ reduce_minmax_FD_v($dst$$FloatRegister,
+    __ reduce_minmax_fp_v($dst$$FloatRegister,
                           $src1$$FloatRegister, as_VectorRegister($src2$$reg),
                           as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                           true /* is_double */, true /* is_min */, Matcher::vector_length_in_bytes(this, $src2));
@@ -1265,44 +1457,38 @@ instruct replicateD(vReg dst, fRegD src) %{
 
 // vector shift
 
-instruct vasrB(vReg dst, vReg src, vReg shift) %{
+instruct vasrB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
   match(Set dst (RShiftVB src shift));
   ins_cost(VEC_COST);
-  effect(TEMP_DEF dst);
-  format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t"
-            "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t"
-            "vmnot.m v0, v0\n\t"
-            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
+  effect(TEMP_DEF dst, TEMP v0);
+  format %{ "vasrB $dst, $src, $shift" %}
   ins_encode %{
     __ rvv_vsetvli(T_BYTE, Matcher::vector_length_in_bytes(this));
     // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
-    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
+    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
     __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                BitsPerByte - 1, Assembler::v0_t);
     // otherwise, shift
-    __ vmnot_m(v0, v0);
+    __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg));
     __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($shift$$reg), Assembler::v0_t);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vasrS(vReg dst, vReg src, vReg shift) %{
+instruct vasrS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
   match(Set dst (RShiftVS src shift));
   ins_cost(VEC_COST);
-  effect(TEMP_DEF dst);
-  format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t"
-            "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t"
-            "vmnot.m v0, v0\n\t"
-            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
+  effect(TEMP_DEF dst, TEMP v0);
+  format %{ "vasrS $dst, $src, $shift" %}
   ins_encode %{
     __ rvv_vsetvli(T_SHORT, Matcher::vector_length_in_bytes(this));
     // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
-    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
+    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
     __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                BitsPerShort - 1, Assembler::v0_t);
     // otherwise, shift
-    __ vmnot_m(v0, v0);
+    __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg));
     __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($shift$$reg), Assembler::v0_t);
   %}
@@ -1312,7 +1498,7 @@ instruct vasrS(vReg dst, vReg src, vReg shift) %{
 instruct vasrI(vReg dst, vReg src, vReg shift) %{
   match(Set dst (RShiftVI src shift));
   ins_cost(VEC_COST);
-  format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %}
+  format %{ "vasrI $dst, $src, $shift" %}
   ins_encode %{
     __ rvv_vsetvli(T_INT, Matcher::vector_length_in_bytes(this));
     __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
@@ -1324,53 +1510,109 @@ instruct vasrI(vReg dst, vReg src, vReg shift) %{
 instruct vasrL(vReg dst, vReg src, vReg shift) %{
   match(Set dst (RShiftVL src shift));
   ins_cost(VEC_COST);
-  format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %}
+  format %{ "vasrL $dst, $src, $shift" %}
   ins_encode %{
     __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this));
     __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-         as_VectorRegister($shift$$reg));
+               as_VectorRegister($shift$$reg));
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vlslB(vReg dst, vReg src, vReg shift) %{
+instruct vasrB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{
+  match(Set dst_src (RShiftVB (Binary dst_src shift) vmask));
+  ins_cost(VEC_COST);
+  effect(TEMP_DEF dst_src, TEMP v0);
+  format %{ "vasrB_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_BYTE, Matcher::vector_length_in_bytes(this));
+    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
+    // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
+    __ vmerge_vim(as_VectorRegister($shift$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
+    // otherwise, shift
+    __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg));
+    __ vsra_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($shift$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vasrS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{
+  match(Set dst_src (RShiftVS (Binary dst_src shift) vmask));
+  ins_cost(VEC_COST);
+  effect(TEMP_DEF dst_src, TEMP v0);
+  format %{ "vasrS_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_SHORT, Matcher::vector_length_in_bytes(this));
+    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
+    // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
+    __ vmerge_vim(as_VectorRegister($shift$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
+    // otherwise, shift
+    __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg));
+    __ vsra_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($shift$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vasrI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
+  match(Set dst_src (RShiftVI (Binary dst_src shift) v0));
+  ins_cost(VEC_COST);
+  effect(TEMP_DEF dst_src);
+  format %{ "vasrI_masked $dst_src, $dst_src, $shift, $v0" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_INT, Matcher::vector_length_in_bytes(this));
+    __ vsra_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($shift$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vasrL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
+  match(Set dst_src (RShiftVL (Binary dst_src shift) v0));
+  ins_cost(VEC_COST);
+  effect(TEMP_DEF dst_src);
+  format %{ "vasrL_masked $dst_src, $dst_src, $shift, $v0" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this));
+    __ vsra_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($shift$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlslB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
   match(Set dst (LShiftVB src shift));
   ins_cost(VEC_COST);
-  effect( TEMP_DEF dst);
-  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t"
-            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-            "vmnot.m v0, v0\n\t"
-            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
+  effect(TEMP_DEF dst, TEMP v0);
+  format %{ "vlslB $dst, $src, $shift" %}
   ins_encode %{
     __ rvv_vsetvli(T_BYTE, Matcher::vector_length_in_bytes(this));
     // if shift > BitsPerByte - 1, clear the element
-    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
+    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
     __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($src$$reg), Assembler::v0_t);
     // otherwise, shift
-    __ vmnot_m(v0, v0);
+    __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg));
     __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($shift$$reg), Assembler::v0_t);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vlslS(vReg dst, vReg src, vReg shift) %{
+instruct vlslS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
   match(Set dst (LShiftVS src shift));
   ins_cost(VEC_COST);
-  effect(TEMP_DEF dst);
-  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t"
-            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-            "vmnot.m v0, v0\n\t"
-            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
+  effect(TEMP_DEF dst, TEMP v0);
+  format %{ "vlslS $dst, $src, $shift" %}
   ins_encode %{
     __ rvv_vsetvli(T_SHORT, Matcher::vector_length_in_bytes(this));
     // if shift > BitsPerShort - 1, clear the element
-    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
+    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
     __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($src$$reg), Assembler::v0_t);
     // otherwise, shift
-    __ vmnot_m(v0, v0);
+    __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg));
     __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($shift$$reg), Assembler::v0_t);
   %}
@@ -1380,7 +1622,7 @@ instruct vlslS(vReg dst, vReg src, vReg shift) %{
 instruct vlslI(vReg dst, vReg src, vReg shift) %{
   match(Set dst (LShiftVI src shift));
   ins_cost(VEC_COST);
-  format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %}
+  format %{ "vlslI $dst, $src, $shift" %}
   ins_encode %{
     __ rvv_vsetvli(T_INT, Matcher::vector_length_in_bytes(this));
     __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
@@ -1392,7 +1634,7 @@ instruct vlslI(vReg dst, vReg src, vReg shift) %{
 instruct vlslL(vReg dst, vReg src, vReg shift) %{
   match(Set dst (LShiftVL src shift));
   ins_cost(VEC_COST);
-  format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %}
+  format %{ "vlslL $dst, $src, $shift" %}
   ins_encode %{
     __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this));
     __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
@@ -1401,55 +1643,116 @@ instruct vlslL(vReg dst, vReg src, vReg shift) %{
   ins_pipe(pipe_slow);
 %}
 
-instruct vlsrB(vReg dst, vReg src, vReg shift) %{
-  match(Set dst (URShiftVB src shift));
+instruct vlslB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{
+  match(Set dst_src (LShiftVB (Binary dst_src shift) vmask));
   ins_cost(VEC_COST);
-  effect(TEMP_DEF dst);
-  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t"
-            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-            "vmnot.m v0, v0, v0\n\t"
-            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
+  effect(TEMP_DEF dst_src, TEMP v0);
+  format %{ "vlslB_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %}
   ins_encode %{
     __ rvv_vsetvli(T_BYTE, Matcher::vector_length_in_bytes(this));
     // if shift > BitsPerByte - 1, clear the element
-    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-               as_VectorRegister($src$$reg), Assembler::v0_t);
+    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
+    __ vmand_mm(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg),
+                as_VectorRegister($vmask$$reg));
+    __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($dst_src$$reg), Assembler::v0_t);
     // otherwise, shift
-    __ vmnot_m(v0, v0);
-    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+    __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg));
+    __ vsll_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
                as_VectorRegister($shift$$reg), Assembler::v0_t);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vlsrS(vReg dst, vReg src, vReg shift) %{
-  match(Set dst (URShiftVS src shift));
+instruct vlslS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{
+  match(Set dst_src (LShiftVS (Binary dst_src shift) vmask));
   ins_cost(VEC_COST);
-  effect(TEMP_DEF dst);
-  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t"
-            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-            "vmnot.m v0, v0\n\t"
-            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
+  effect(TEMP_DEF dst_src, TEMP v0);
+  format %{ "vlslS_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %}
   ins_encode %{
     __ rvv_vsetvli(T_SHORT, Matcher::vector_length_in_bytes(this));
     // if shift > BitsPerShort - 1, clear the element
-    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
+    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
+    __ vmand_mm(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg),
+                as_VectorRegister($vmask$$reg));
+    __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($dst_src$$reg), Assembler::v0_t);
+    // otherwise, shift
+    __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg));
+    __ vsll_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($shift$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlslI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
+  match(Set dst_src (LShiftVI (Binary dst_src shift) v0));
+  ins_cost(VEC_COST);
+  effect(TEMP_DEF dst_src);
+  format %{ "vlslI_masked $dst_src, $dst_src, $shift, $v0" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_INT, Matcher::vector_length_in_bytes(this));
+    __ vsll_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($shift$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlslL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
+  match(Set dst_src (LShiftVL (Binary dst_src shift) v0));
+  ins_cost(VEC_COST);
+  effect(TEMP_DEF dst_src);
+  format %{ "vlslL_masked $dst_src, $dst_src, $shift, $v0" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this));
+    __ vsll_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($shift$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlsrB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
+  match(Set dst (URShiftVB src shift));
+  ins_cost(VEC_COST);
+  effect(TEMP_DEF dst, TEMP v0);
+  format %{ "vlsrB $dst, $src, $shift" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_BYTE, Matcher::vector_length_in_bytes(this));
+    // if shift > BitsPerByte - 1, clear the element
+    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
     __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($src$$reg), Assembler::v0_t);
     // otherwise, shift
-    __ vmnot_m(v0, v0);
+    __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg));
     __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($shift$$reg), Assembler::v0_t);
   %}
   ins_pipe(pipe_slow);
 %}
 
+instruct vlsrS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
+  match(Set dst (URShiftVS src shift));
+  ins_cost(VEC_COST);
+  effect(TEMP_DEF dst, TEMP v0);
+  format %{ "vlsrS $dst, $src, $shift" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_SHORT, Matcher::vector_length_in_bytes(this));
+    // if shift > BitsPerShort - 1, clear the element
+    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+               as_VectorRegister($src$$reg), Assembler::v0_t);
+    // otherwise, shift
+    __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg));
+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+               as_VectorRegister($shift$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
 
 instruct vlsrI(vReg dst, vReg src, vReg shift) %{
   match(Set dst (URShiftVI src shift));
   ins_cost(VEC_COST);
-  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %}
+  format %{ "vlsrI $dst, $src, $shift" %}
   ins_encode %{
     __ rvv_vsetvli(T_INT, Matcher::vector_length_in_bytes(this));
     __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
@@ -1458,11 +1761,10 @@ instruct vlsrI(vReg dst, vReg src, vReg shift) %{
   ins_pipe(pipe_slow);
 %}
 
-
 instruct vlsrL(vReg dst, vReg src, vReg shift) %{
   match(Set dst (URShiftVL src shift));
   ins_cost(VEC_COST);
-  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %}
+  format %{ "vlsrL $dst, $src, $shift" %}
   ins_encode %{
     __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this));
     __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
@@ -1471,6 +1773,74 @@ instruct vlsrL(vReg dst, vReg src, vReg shift) %{
   ins_pipe(pipe_slow);
 %}
 
+instruct vlsrB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{
+  match(Set dst_src (URShiftVB (Binary dst_src shift) vmask));
+  ins_cost(VEC_COST);
+  effect(TEMP_DEF dst_src, TEMP v0);
+  format %{ "vlsrB_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_BYTE, Matcher::vector_length_in_bytes(this));
+    // if shift > BitsPerByte - 1, clear the element
+    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
+    __ vmand_mm(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg),
+                as_VectorRegister($vmask$$reg));
+    __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($dst_src$$reg), Assembler::v0_t);
+    // otherwise, shift
+    __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg));
+    __ vsrl_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($shift$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlsrS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{
+  match(Set dst_src (URShiftVS (Binary dst_src shift) vmask));
+  ins_cost(VEC_COST);
+  effect(TEMP_DEF dst_src, TEMP v0);
+  format %{ "vlsrS_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_SHORT, Matcher::vector_length_in_bytes(this));
+    // if shift > BitsPerShort - 1, clear the element
+    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
+    __ vmand_mm(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg),
+                as_VectorRegister($vmask$$reg));
+    __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($dst_src$$reg), Assembler::v0_t);
+    // otherwise, shift
+    __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg));
+    __ vsrl_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($shift$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlsrI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
+  match(Set dst_src (URShiftVI (Binary dst_src shift) v0));
+  ins_cost(VEC_COST);
+  effect(TEMP_DEF dst_src);
+  format %{ "vlsrI_masked $dst_src, $dst_src, $shift, $v0" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_INT, Matcher::vector_length_in_bytes(this));
+    __ vsrl_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($shift$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vlsrL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
+  match(Set dst_src (URShiftVL (Binary dst_src shift) v0));
+  ins_cost(VEC_COST);
+  effect(TEMP_DEF dst_src);
+  format %{ "vlsrL_masked $dst_src, $dst_src, $shift, $v0" %}
+  ins_encode %{
+    __ rvv_vsetvli(T_LONG, Matcher::vector_length_in_bytes(this));
+    __ vsrl_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
+               as_VectorRegister($shift$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
   match(Set dst (RShiftVB src (RShiftCntV shift)));
   ins_cost(VEC_COST);
@@ -1827,13 +2197,45 @@ instruct vsubD(vReg dst, vReg src1, vReg src2) %{
   ins_pipe(pipe_slow);
 %}
 
+// vector sub - predicated
+
+instruct vsub_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
+  match(Set dst_src1 (SubVB (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (SubVS (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (SubVI (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (SubVL (Binary dst_src1 src2) v0));
+  ins_cost(VEC_COST);
+  format %{ "vsub.vv $dst_src1, $src2, $v0\t#@vsub_masked" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vsub_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
+               as_VectorRegister($src2$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsub_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
+  match(Set dst_src1 (SubVF (Binary dst_src1 src2) v0));
+  match(Set dst_src1 (SubVD (Binary dst_src1 src2) v0));
+  ins_cost(VEC_COST);
+  format %{ "vfsub.vv $dst_src1, $src2, $v0\t#@vsub_fp_masked" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vfsub_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
+                as_VectorRegister($src2$$reg), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
                          iRegI_R10 result, vReg_V1 v1,
-                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
+                         vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, rFlagsReg cr)
 %{
   predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (StrEquals (Binary str1 str2) cnt));
-  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, TEMP v0, KILL cr);
 
   format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
   ins_encode %{
@@ -1846,11 +2248,11 @@ instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
 
 instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
                          iRegI_R10 result, vReg_V1 v1,
-                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
+                         vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, rFlagsReg cr)
 %{
   predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
   match(Set result (StrEquals (Binary str1 str2) cnt));
-  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, TEMP v0, KILL cr);
 
   format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
   ins_encode %{
@@ -1862,11 +2264,11 @@ instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
 %}
 
 instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
+                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegP_R28 tmp, rFlagsReg cr)
 %{
   predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (AryEq ary1 ary2));
-  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, TEMP v0, KILL cr);
 
   format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
   ins_encode %{
@@ -1877,11 +2279,11 @@ instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
 %}
 
 instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
+                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegP_R28 tmp, rFlagsReg cr)
 %{
   predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
   match(Set result (AryEq ary1 ary2));
-  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, TEMP v0, KILL cr);
 
   format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
   ins_encode %{
@@ -1893,12 +2295,12 @@ instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
 
 instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-                          iRegP_R28 tmp1, iRegL_R29 tmp2)
+                          vRegMask_V0 v0, iRegP_R28 tmp1, iRegL_R29 tmp2)
 %{
   predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
   match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5, TEMP v0);
 
   format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
   ins_encode %{
@@ -1912,12 +2314,12 @@ instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
 %}
 instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-                          iRegP_R28 tmp1, iRegL_R29 tmp2)
+                          vRegMask_V0 v0, iRegP_R28 tmp1, iRegL_R29 tmp2)
 %{
   predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5, TEMP v0);
 
   format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
   ins_encode %{
@@ -1931,12 +2333,12 @@ instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
 
 instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                            iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-                           iRegP_R28 tmp1, iRegL_R29 tmp2)
+                           vRegMask_V0 v0, iRegP_R28 tmp1, iRegL_R29 tmp2)
 %{
   predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
   match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5, TEMP v0);
 
   format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
   ins_encode %{
@@ -1949,12 +2351,12 @@ instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI
 %}
 instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                            iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-                           iRegP_R28 tmp1, iRegL_R29 tmp2)
+                           vRegMask_V0 v0, iRegP_R28 tmp1, iRegL_R29 tmp2)
 %{
   predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
   match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5, TEMP v0);
 
   format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
   ins_encode %{
@@ -1968,11 +2370,11 @@ instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI
 
 // fast byte[] to char[] inflation
 instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
-                         vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegLNoSp tmp)
+                         vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegLNoSp tmp)
 %{
   predicate(UseRVV);
   match(Set dummy (StrInflatedCopy src (Binary dst len)));
-  effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
+  effect(TEMP v1, TEMP v2, TEMP v3, TEMP v0, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
 
   format %{ "String Inflate $src,$dst" %}
   ins_encode %{
@@ -1983,12 +2385,12 @@ instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12
 
 // encode char[] to byte[] in ISO_8859_1
 instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
-                           vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegLNoSp tmp)
+                           vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegLNoSp tmp)
 %{
   predicate(UseRVV);
   match(Set result (EncodeISOArray src (Binary dst len)));
   effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
-         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
+         TEMP v1, TEMP v2, TEMP v3, TEMP tmp, TEMP v0);
 
   format %{ "Encode array $src,$dst,$len -> $result" %}
   ins_encode %{
@@ -2000,12 +2402,12 @@ instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R1
 
 // fast char[] to byte[] compression
 instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
-                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegLNoSp tmp)
+                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegLNoSp tmp)
 %{
   predicate(UseRVV);
   match(Set result (StrCompressedCopy src (Binary dst len)));
   effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
-         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
+         TEMP v1, TEMP v2, TEMP v3, TEMP tmp, TEMP v0);
 
   format %{ "String Compress $src,$dst -> $result    // KILL R11, R12, R13" %}
   ins_encode %{
@@ -2016,11 +2418,11 @@ instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10
 %}
 
 instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result,
-                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegLNoSp tmp)
+                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegLNoSp tmp)
 %{
   predicate(UseRVV);
   match(Set result (CountPositives ary len));
-  effect(TEMP_DEF result, USE_KILL ary, USE_KILL len, TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
+  effect(TEMP_DEF result, USE_KILL ary, USE_KILL len, TEMP v1, TEMP v2, TEMP v3, TEMP tmp, TEMP v0);
 
   format %{ "count positives byte[] $ary, $len -> $result" %}
   ins_encode %{
@@ -2032,12 +2434,12 @@ instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result,
 
 instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
                                iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
+                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0)
 %{
   predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
   effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
-         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
+         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3, TEMP v0);
 
   format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
 
@@ -2052,12 +2454,12 @@ instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
 
 instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
                                iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
+                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0)
 %{
   predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
   effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
-         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
+         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3, TEMP v0);
 
   format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
 
@@ -2072,11 +2474,11 @@ instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
 
 // clearing of an array
 instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
-                             vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
+                             vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3, vRegMask_V0 v0)
 %{
   predicate(!UseBlockZeroing && UseRVV);
   match(Set dummy (ClearArray cnt base));
-  effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
+  effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3, TEMP v0);
 
   format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
 
@@ -2101,4 +2503,188 @@ instruct vloadcon(vReg dst, immI0 src) %{
     }
   %}
   ins_pipe(pipe_slow);
+%}
+
+instruct vmask_gen_I(vRegMask dst, iRegI src) %{
+  match(Set dst (VectorMaskGen (ConvI2L src)));
+  format %{ "vmask_gen_I $dst, $src" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
+    __ vsetvli(t0, $src$$Register, sew);
+    __ vmset_m(as_VectorRegister($dst$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmask_gen_L(vRegMask dst, iRegL src) %{
+  match(Set dst (VectorMaskGen src));
+  format %{ "vmask_gen_L $dst, $src" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
+    __ vsetvli(t0, $src$$Register, sew);
+    __ vmset_m(as_VectorRegister($dst$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmask_gen_imm(vRegMask dst, immL con) %{
+  match(Set dst (VectorMaskGen con));
+  format %{ "vmask_gen_imm $dst, $con" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, (uint)($con$$constant));
+    __ vmset_m(as_VectorRegister($dst$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmaskAll_immI(vRegMask dst, immI src) %{
+  match(Set dst (MaskAll src));
+  format %{ "vmaskAll_immI $dst, $src" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    int con = (int)$src$$constant;
+    if (con == 0) {
+      __ vmclr_m(as_VectorRegister($dst$$reg));
+    } else {
+      assert(con == -1, "invalid constant value for mask");
+      __ vmset_m(as_VectorRegister($dst$$reg));
+    }
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmaskAllI(vRegMask dst, iRegI src) %{
+  match(Set dst (MaskAll src));
+  format %{ "vmaskAllI $dst, $src" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
+    __ vmsne_vx(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), zr);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmaskAll_immL(vRegMask dst, immL src) %{
+  match(Set dst (MaskAll src));
+  format %{ "vmaskAll_immL $dst, $src" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    long con = (long)$src$$constant;
+    if (con == 0) {
+      __ vmclr_m(as_VectorRegister($dst$$reg));
+    } else {
+      assert(con == -1, "invalid constant value for mask");
+      __ vmset_m(as_VectorRegister($dst$$reg));
+    }
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmaskAllL(vRegMask dst, iRegL src) %{
+  match(Set dst (MaskAll src));
+  format %{ "vmaskAllL $dst, $src" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
+    __ vmsne_vx(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), zr);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// ------------------------------ Vector mask basic OPs ------------------------
+
+// vector mask logical ops: and/or/xor
+
+instruct vmask_and(vRegMask dst, vRegMask src1, vRegMask src2) %{
+  match(Set dst (AndVMask src1 src2));
+  format %{ "vmask_and $dst, $src1, $src2" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vmand_mm(as_VectorRegister($dst$$reg),
+                as_VectorRegister($src1$$reg),
+                as_VectorRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmask_or(vRegMask dst, vRegMask src1, vRegMask src2) %{
+  match(Set dst (OrVMask src1 src2));
+  format %{ "vmask_or $dst, $src1, $src2" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vmor_mm(as_VectorRegister($dst$$reg),
+               as_VectorRegister($src1$$reg),
+               as_VectorRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmask_xor(vRegMask dst, vRegMask src1, vRegMask src2) %{
+  match(Set dst (XorVMask src1 src2));
+  format %{ "vmask_xor $dst, $src1, $src2" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vmxor_mm(as_VectorRegister($dst$$reg),
+                as_VectorRegister($src1$$reg),
+                as_VectorRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmaskcast(vRegMask dst_src) %{
+  match(Set dst_src (VectorMaskCast dst_src));
+  ins_cost(0);
+  format %{ "vmaskcast $dst_src\t# do nothing" %}
+  ins_encode(/* empty encoding */);
+  ins_pipe(pipe_class_empty);
+%}
+
+// vector load/store - predicated
+
+instruct loadV_masked(vReg dst, vmemA mem, vRegMask_V0 v0) %{
+  match(Set dst (LoadVectorMasked mem v0));
+  format %{ "loadV_masked $dst, $mem, $v0" %}
+  ins_encode %{
+    VectorRegister dst_reg = as_VectorRegister($dst$$reg);
+    loadStore(C2_MacroAssembler(&cbuf), false, dst_reg,
+              Matcher::vector_element_basic_type(this), as_Register($mem$$base),
+              Matcher::vector_length_in_bytes(this), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct storeV_masked(vReg src, vmemA mem, vRegMask_V0 v0) %{
+  match(Set mem (StoreVectorMasked mem (Binary src v0)));
+  format %{ "storeV_masked $mem, $src, $v0" %}
+  ins_encode %{
+    VectorRegister src_reg = as_VectorRegister($src$$reg);
+    loadStore(C2_MacroAssembler(&cbuf), true, src_reg,
+              Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base),
+              Matcher::vector_length_in_bytes(this, $src), Assembler::v0_t);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// ------------------------------ Vector blend ---------------------------------
+
+instruct vblend(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{
+  match(Set dst (VectorBlend (Binary src1 src2) v0));
+  format %{ "vmerge_vvm $dst, $src1, $src2, v0\t#@vector blend" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ rvv_vsetvli(bt, Matcher::vector_length_in_bytes(this));
+    __ vmerge_vvm(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+                  as_VectorRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_slow);
 %}
\ No newline at end of file