8183390: Fix and re-enable post loop vectorization

Reviewed-by: roland, thartmann, kvn
2022-04-05 23:50:13 +00:00 · 2022-04-05 23:50:13 +00:00 · 741be46138
commit 741be46138
parent 500f9a577b
40 changed files with 4571 additions and 329 deletions
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@ -48,20 +48,6 @@ inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vle
  }
 }

-void C2_MacroAssembler::setvectmask(Register dst, Register src, KRegister mask) {
-  guarantee(PostLoopMultiversioning, "must be");
-  Assembler::movl(dst, 1);
-  Assembler::shlxl(dst, dst, src);
-  Assembler::decl(dst);
-  Assembler::kmovdl(mask, dst);
-  Assembler::movl(dst, src);
-}
-
-void C2_MacroAssembler::restorevectmask(KRegister mask) {
-  guarantee(PostLoopMultiversioning, "must be");
-  Assembler::knotwl(mask, k0);
-}
-
 #if INCLUDE_RTM_OPT

 // Update rtm_counters based on abort status
@ -1947,7 +1933,6 @@ void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMReg
 }

 void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
-  assert(ArrayOperationPartialInlineSize > 0 && ArrayOperationPartialInlineSize <= 64, "invalid");
  mov64(temp, -1L);
  bzhiq(temp, temp, len);
  kmovql(dst, temp);
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@ -30,10 +30,6 @@
 public:
  Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);

-  // special instructions for EVEX
-  void setvectmask(Register dst, Register src, KRegister mask);
-  void restorevectmask(KRegister mask);
-
  // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
  // See full desription in macroAssembler_x86.cpp.
  void fast_lock(Register obj, Register box, Register tmp,
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@ -4671,16 +4671,6 @@ void MacroAssembler::restore_cpu_control_state_after_jni() {
  }
  // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty.
  vzeroupper();
-  // Reset k1 to 0xffff.
-
-#ifdef COMPILER2
-  if (PostLoopMultiversioning && VM_Version::supports_evex()) {
-    push(rcx);
-    movl(rcx, 0xffff);
-    kmovwl(k1, rcx);
-    pop(rcx);
-  }
-#endif // COMPILER2

 #ifndef _LP64
  // Either restore the x87 floating pointer control word after returning
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@ -2846,23 +2846,6 @@ instruct ShouldNotReachHere() %{
  ins_pipe(pipe_slow);
 %}

-// =================================EVEX special===============================
-// Existing partial implementation for post-loop multi-versioning computes
-// the mask corresponding to tail loop in K1 opmask register. This may then be
-// used for predicating instructions in loop body during last post-loop iteration.
-// TODO: Remove hard-coded K1 usage while fixing existing post-loop
-// multiversioning support.
-instruct setMask(rRegI dst, rRegI src, kReg_K1 mask) %{
-  predicate(PostLoopMultiversioning && Matcher::has_predicated_vectors());
-  match(Set dst (SetVectMaskI  src));
-  effect(TEMP dst);
-  format %{ "setvectmask   $dst, $src" %}
-  ins_encode %{
-    __ setvectmask($dst$$Register, $src$$Register, $mask$$KRegister);
-  %}
-  ins_pipe(pipe_slow);
-%}
-
 // ============================================================================

 instruct addF_reg(regF dst, regF src) %{
--- a/src/hotspot/cpu/x86/x86_32.ad
+++ b/src/hotspot/cpu/x86/x86_32.ad
@ -260,17 +260,7 @@ source %{
 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 // fast versions of NegF/NegD and AbsF/AbsD.

-void reg_mask_init() {
-  if (Matcher::has_predicated_vectors()) {
-    // Post-loop multi-versioning expects mask to be present in K1 register, till the time
-    // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
-    // curruption of value held in K1 register.
-    if (PostLoopMultiversioning) {
-      const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
-      const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
-    }
-  }
-}
+void reg_mask_init() {}

 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
@ -12610,7 +12600,6 @@ instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{

 // Jump Direct Conditional - Label defines a relative address from Jcc+1
 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
-  predicate(!n->has_vector_mask_set());
  match(CountedLoopEnd cop cr);
  effect(USE labl);

@ -12626,7 +12615,6 @@ instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{

 // Jump Direct Conditional - Label defines a relative address from Jcc+1
 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
-  predicate(!n->has_vector_mask_set());
  match(CountedLoopEnd cop cmp);
  effect(USE labl);

@ -12641,7 +12629,6 @@ instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
 %}

 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
-  predicate(!n->has_vector_mask_set());
  match(CountedLoopEnd cop cmp);
  effect(USE labl);

@ -12655,66 +12642,6 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
  ins_pipe( pipe_jcc );
 %}

-// mask version
-// Jump Direct Conditional - Label defines a relative address from Jcc+1
-// Bounded mask operand used in following patten is needed for
-// post-loop multiversioning.
-instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
-  predicate(PostLoopMultiversioning && n->has_vector_mask_set());
-  match(CountedLoopEnd cop cr);
-  effect(USE labl, TEMP ktmp);
-
-  ins_cost(400);
-  format %{ "J$cop    $labl\t# Loop end\n\t"
-            "restorevectmask \t# vector mask restore for loops" %}
-  size(10);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
-    __ restorevectmask($ktmp$$KRegister);
-  %}
-  ins_pipe( pipe_jcc );
-%}
-
-// Jump Direct Conditional - Label defines a relative address from Jcc+1
-// Bounded mask operand used in following patten is needed for
-// post-loop multiversioning.
-instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
-  predicate(PostLoopMultiversioning && n->has_vector_mask_set());
-  match(CountedLoopEnd cop cmp);
-  effect(USE labl, TEMP ktmp);
-
-  ins_cost(400);
-  format %{ "J$cop,u  $labl\t# Loop end\n\t"
-            "restorevectmask \t# vector mask restore for loops" %}
-  size(10);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
-    __ restorevectmask($ktmp$$KRegister);
-  %}
-  ins_pipe( pipe_jcc );
-%}
-
-// Bounded mask operand used in following patten is needed for
-// post-loop multiversioning.
-instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
-  predicate(PostLoopMultiversioning && n->has_vector_mask_set());
-  match(CountedLoopEnd cop cmp);
-  effect(USE labl, TEMP ktmp);
-
-  ins_cost(300);
-  format %{ "J$cop,u  $labl\t# Loop end\n\t"
-            "restorevectmask \t# vector mask restore for loops" %}
-  size(10);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
-    __ restorevectmask($ktmp$$KRegister);
-  %}
-  ins_pipe( pipe_jcc );
-%}
-
 // Jump Direct Conditional - using unsigned comparison
 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
  match(If cop cmp);
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@ -454,16 +454,6 @@ void reg_mask_init() {
  // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
  // from the float_reg_legacy/float_reg_evex register class.
  _FLOAT_REG_mask = VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask;
-
-  if (Matcher::has_predicated_vectors()) {
-    // Post-loop multi-versioning expects mask to be present in K1 register, till the time
-    // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
-    // curruption of value held in K1 register.
-    if (PostLoopMultiversioning) {
-      const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
-      const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
-    }
-  }
 }

 static bool generate_vzeroupper(Compile* C) {
@ -12713,7 +12703,6 @@ instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
 // Jump Direct Conditional - Label defines a relative address from Jcc+1
 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
 %{
-  predicate(!n->has_vector_mask_set());
  match(CountedLoopEnd cop cr);
  effect(USE labl);

@ -12729,7 +12718,6 @@ instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)

 // Jump Direct Conditional - Label defines a relative address from Jcc+1
 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
-  predicate(!n->has_vector_mask_set());
  match(CountedLoopEnd cop cmp);
  effect(USE labl);

@ -12744,7 +12732,6 @@ instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
 %}

 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
-  predicate(!n->has_vector_mask_set());
  match(CountedLoopEnd cop cmp);
  effect(USE labl);

@ -12758,67 +12745,6 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
  ins_pipe(pipe_jcc);
 %}

-// mask version
-// Jump Direct Conditional - Label defines a relative address from Jcc+1
-// Bounded mask operand used in following patten is needed for
-// post-loop multiversioning.
-instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, rFlagsReg cr, label labl)
-%{
-  predicate(PostLoopMultiversioning && n->has_vector_mask_set());
-  match(CountedLoopEnd cop cr);
-  effect(USE labl, TEMP ktmp);
-
-  ins_cost(400);
-  format %{ "j$cop     $labl\t# loop end\n\t"
-            "restorevectmask \t# vector mask restore for loops" %}
-  size(10);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
-    __ restorevectmask($ktmp$$KRegister);
-  %}
-  ins_pipe(pipe_jcc);
-%}
-
-// Jump Direct Conditional - Label defines a relative address from Jcc+1
-// Bounded mask operand used in following patten is needed for
-// post-loop multiversioning.
-instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, rFlagsRegU cmp, label labl) %{
-  predicate(PostLoopMultiversioning && n->has_vector_mask_set());
-  match(CountedLoopEnd cop cmp);
-  effect(USE labl, TEMP ktmp);
-
-  ins_cost(400);
-  format %{ "j$cop,u   $labl\t# loop end\n\t"
-            "restorevectmask \t# vector mask restore for loops" %}
-  size(10);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
-    __ restorevectmask($ktmp$$KRegister);
-  %}
-  ins_pipe(pipe_jcc);
-%}
-
-// Bounded mask operand used in following patten is needed for
-// post-loop multiversioning.
-instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, rFlagsRegUCF cmp, label labl) %{
-  predicate(PostLoopMultiversioning && n->has_vector_mask_set());
-  match(CountedLoopEnd cop cmp);
-  effect(USE labl, TEMP ktmp);
-
-  ins_cost(300);
-  format %{ "j$cop,u   $labl\t# loop end\n\t"
-            "restorevectmask \t# vector mask restore for loops" %}
-  size(10);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
-    __ restorevectmask($ktmp$$KRegister);
-  %}
-  ins_pipe(pipe_jcc);
-%}
-
 // Jump Direct Conditional - using unsigned comparison
 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
  match(If cop cmp);
--- a/src/hotspot/share/opto/classes.hpp
+++ b/src/hotspot/share/opto/classes.hpp
@ -214,7 +214,6 @@ macro(MaxD)
 macro(MaxF)
 macro(MemBarAcquire)
 macro(LoadFence)
-macro(SetVectMaskI)
 macro(MemBarAcquireLock)
 macro(MemBarCPUOrder)
 macro(MemBarRelease)
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@ -3671,7 +3671,8 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
      phase->has_range_checks(this);
    }

-    if (should_unroll && !should_peel && PostLoopMultiversioning) {
+    if (should_unroll && !should_peel && PostLoopMultiversioning &&
+        Matcher::has_predicated_vectors()) {
      // Try to setup multiversioning on main loops before they are unrolled
      if (cl->is_main_loop() && (cl->unrolled_count() == 1)) {
        phase->insert_scalar_rced_post_loop(this, old_new);
--- a/src/hotspot/share/opto/loopnode.cpp
+++ b/src/hotspot/share/opto/loopnode.cpp
@ -4547,7 +4547,8 @@ void PhaseIdealLoop::build_and_optimize() {
      if (lpt->is_counted()) {
        CountedLoopNode *cl = lpt->_head->as_CountedLoop();

-        if (PostLoopMultiversioning && cl->is_rce_post_loop() && !cl->is_vectorized_loop()) {
+        if (cl->is_rce_post_loop() && !cl->is_vectorized_loop()) {
+          assert(PostLoopMultiversioning, "multiversioning must be enabled");
          // Check that the rce'd post loop is encountered first, multiversion after all
          // major main loop optimization are concluded
          if (!C->major_progress()) {
--- a/src/hotspot/share/opto/loopnode.hpp
+++ b/src/hotspot/share/opto/loopnode.hpp
@ -246,11 +246,14 @@ class CountedLoopNode : public BaseCountedLoopNode {
  // vector mapped unroll factor here
  int _slp_maximum_unroll_factor;

+  // The eventual count of vectorizable packs in slp
+  int _slp_vector_pack_count;
+
 public:
  CountedLoopNode(Node *entry, Node *backedge)
    : BaseCountedLoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint),
      _unrolled_count_log2(0), _node_count_before_unroll(0),
-      _slp_maximum_unroll_factor(0) {
+      _slp_maximum_unroll_factor(0), _slp_vector_pack_count(0) {
    init_class_id(Class_CountedLoop);
    // Initialize _trip_count to the largest possible value.
    // Will be reset (lower) if the loop's trip count is known.
@ -327,6 +330,8 @@ public:
  int  node_count_before_unroll()            { return _node_count_before_unroll; }
  void set_slp_max_unroll(int unroll_factor) { _slp_maximum_unroll_factor = unroll_factor; }
  int  slp_max_unroll() const                { return _slp_maximum_unroll_factor; }
+  void set_slp_pack_count(int pack_count)    { _slp_vector_pack_count = pack_count; }
+  int  slp_pack_count() const                { return _slp_vector_pack_count; }

  virtual LoopNode* skip_strip_mined(int expect_skeleton = 1);
  OuterStripMinedLoopNode* outer_loop() const;
--- a/src/hotspot/share/opto/node.hpp
+++ b/src/hotspot/share/opto/node.hpp
@ -782,11 +782,10 @@ public:
    Flag_has_call                    = 1 << 10,
    Flag_is_reduction                = 1 << 11,
    Flag_is_scheduled                = 1 << 12,
-    Flag_has_vector_mask_set         = 1 << 13,
-    Flag_is_expensive                = 1 << 14,
-    Flag_is_predicated_vector        = 1 << 15,
-    Flag_for_post_loop_opts_igvn     = 1 << 16,
-    Flag_is_removed_by_peephole      = 1 << 17,
+    Flag_is_expensive                = 1 << 13,
+    Flag_is_predicated_vector        = 1 << 14,
+    Flag_for_post_loop_opts_igvn     = 1 << 15,
+    Flag_is_removed_by_peephole      = 1 << 16,
    _last_flag                       = Flag_is_removed_by_peephole
  };

@ -1001,9 +1000,6 @@ public:

  bool is_predicated_vector() const { return (_flags & Flag_is_predicated_vector) != 0; }

-  // The node is a CountedLoopEnd with a mask annotation so as to emit a restore context
-  bool has_vector_mask_set() const { return (_flags & Flag_has_vector_mask_set) != 0; }
-
  // Used in lcm to mark nodes that have scheduled
  bool is_scheduled() const { return (_flags & Flag_is_scheduled) != 0; }

--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@ -110,26 +110,9 @@ bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
    return false; // skip malformed counted loop
  }

-  bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
-  if (post_loop_allowed) {
-    if (cl->is_reduction_loop()) {
-      return false; // no predication mapping
-    }
-    Node *limit = cl->limit();
-    if (limit->is_Con()) {
-      return false; // non constant limits only
-    }
-    // Now check the limit for expressions we do not handle
-    if (limit->is_Add()) {
-      Node *in2 = limit->in(2);
-      if (in2->is_Con()) {
-        int val = in2->get_int();
-        // should not try to program these cases
-        if (val < 0) {
-          return false;
-        }
-      }
-    }
+  if (cl->is_rce_post_loop() && cl->is_reduction_loop()) {
+    // Post loop vectorization doesn't support reductions
+    return false;
  }

  // skip any loop that has not been assigned max unroll by analysis
@ -193,15 +176,21 @@ bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
  if (do_optimization) {
    assert(_packset.length() == 0, "packset must be empty");
    success = SLP_extract();
-    if (PostLoopMultiversioning && Matcher::has_predicated_vectors()) {
+    if (PostLoopMultiversioning) {
      if (cl->is_vectorized_loop() && cl->is_main_loop() && !cl->is_reduction_loop()) {
-        IdealLoopTree *lpt_next = lpt->_next;
+        IdealLoopTree *lpt_next = cl->is_strip_mined() ? lpt->_parent->_next : lpt->_next;
        CountedLoopNode *cl_next = lpt_next->_head->as_CountedLoop();
        _phase->has_range_checks(lpt_next);
-        if (cl_next->is_post_loop() && !cl_next->range_checks_present()) {
+        // Main loop SLP works well for manually unrolled loops. But post loop
+        // vectorization doesn't work for these. To bail out the optimization
+        // earlier, we have range check and loop stride conditions below.
+        if (cl_next->is_post_loop() && !cl_next->range_checks_present() &&
+            cl_next->stride_is_con() && abs(cl_next->stride_con()) == 1) {
          if (!cl_next->is_vectorized_loop()) {
-            int slp_max_unroll_factor = cl->slp_max_unroll();
-            cl_next->set_slp_max_unroll(slp_max_unroll_factor);
+            // Propagate some main loop attributes to its corresponding scalar
+            // rce'd post loop for vectorization with vector masks
+            cl_next->set_slp_max_unroll(cl->slp_max_unroll());
+            cl_next->set_slp_pack_count(cl->slp_pack_count());
          }
        }
      }
@ -229,7 +218,6 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
  }

  int max_vector = Matcher::max_vector_size(T_BYTE);
-  bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());

  // Process the loop, some/all of the stack entries will not be in order, ergo
  // need to preprocess the ignored initial state before we process the loop
@ -239,6 +227,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
      n->is_reduction() ||
      n->is_AddP() ||
      n->is_Cmp() ||
+      n->is_Bool() ||
      n->is_IfTrue() ||
      n->is_CountedLoop() ||
      (n == cl_exit)) {
@ -326,9 +315,29 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
  }

  if (is_slp) {
+    // In the main loop, SLP works well if parts of the operations in the loop body
+    // are not vectorizable and those non-vectorizable parts will be unrolled only.
+    // But in post loops with vector masks, we create singleton packs directly from
+    // scalars so all operations should be vectorized together. This compares the
+    // number of packs in the post loop with the main loop and bail out if the post
+    // loop potentially has more packs.
+    if (cl->is_rce_post_loop()) {
+      for (uint i = 0; i < lpt()->_body.size(); i++) {
+        if (ignored_loop_nodes[i] == -1) {
+          _post_block.at_put_grow(rpo_idx++, lpt()->_body.at(i));
+        }
+      }
+      if (_post_block.length() > cl->slp_pack_count()) {
+        // Clear local_loop_unroll_factor and bail out directly from here
+        local_loop_unroll_factor = 0;
+        cl->mark_was_slp();
+        cl->set_slp_max_unroll(0);
+        return;
+      }
+    }
+
    // Now we try to find the maximum supported consistent vector which the machine
    // description can use
-    bool small_basic_type = false;
    bool flag_small_bt = false;
    for (uint i = 0; i < lpt()->_body.size(); i++) {
      if (ignored_loop_nodes[i] != -1) continue;
@ -341,31 +350,9 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
        bt = n->bottom_type()->basic_type();
      }

-      if (post_loop_allowed) {
-        if (!small_basic_type) {
-          switch (bt) {
-          case T_CHAR:
-          case T_BYTE:
-          case T_SHORT:
-            small_basic_type = true;
-            break;
-
-          case T_LONG:
-            // TODO: Remove when support completed for mask context with LONG.
-            //       Support needs to be augmented for logical qword operations, currently we map to dword
-            //       buckets for vectors on logicals as these were legacy.
-            small_basic_type = true;
-            break;
-
-          default:
-            break;
-          }
-        }
-      }
-
      if (is_java_primitive(bt) == false) continue;

-         int cur_max_vector = Matcher::max_vector_size(bt);
+      int cur_max_vector = Matcher::max_vector_size(bt);

      // If a max vector exists which is not larger than _local_loop_unroll_factor
      // stop looking, we already have the max vector to map to.
@ -410,11 +397,6 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
            }
          }
        }
-        // We only process post loops on predicated targets where we want to
-        // mask map the loop to a single iteration
-        if (post_loop_allowed) {
-          _post_block.at_put_grow(rpo_idx++, n);
-        }
      }
    }
    if (is_slp) {
@ -422,14 +404,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
      cl->mark_passed_slp();
    }
    cl->mark_was_slp();
-    if (cl->is_main_loop()) {
-      cl->set_slp_max_unroll(local_loop_unroll_factor);
-    } else if (post_loop_allowed) {
-      if (!small_basic_type) {
-        // avoid replication context for small basic types in programmable masked loops
-        cl->set_slp_max_unroll(local_loop_unroll_factor);
-      }
-    }
+    cl->set_slp_max_unroll(local_loop_unroll_factor);
  }
 }

@ -492,7 +467,6 @@ bool SuperWord::SLP_extract() {
  compute_max_depth();

  CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
-  bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
  if (cl->is_main_loop()) {
    if (_do_vector_loop_experimental) {
      if (mark_generations() != -1) {
@ -553,7 +527,13 @@ bool SuperWord::SLP_extract() {
    filter_packs();

    schedule();
-  } else if (post_loop_allowed) {
+
+    // Record eventual count of vector packs for checks in post loop vectorization
+    if (PostLoopMultiversioning) {
+      cl->set_slp_pack_count(_packset.length());
+    }
+  } else {
+    assert(cl->is_rce_post_loop(), "Must be an rce'd post loop");
    int saved_mapped_unroll_factor = cl->slp_max_unroll();
    if (saved_mapped_unroll_factor) {
      int vector_mapped_unroll_factor = saved_mapped_unroll_factor;
@ -2429,7 +2409,6 @@ bool SuperWord::output() {

  uint max_vlen_in_bytes = 0;
  uint max_vlen = 0;
-  bool can_process_post_loop = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());

  NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);})

@ -2442,6 +2421,15 @@ bool SuperWord::output() {
    return false;
  }

+  Node* vmask = NULL;
+  if (cl->is_rce_post_loop() && do_reserve_copy()) {
+    // Create a vector mask node for post loop, bail out if not created
+    vmask = create_post_loop_vmask();
+    if (vmask == NULL) {
+      return false; // and reverse to backup IG
+    }
+  }
+
  for (int i = 0; i < _block.length(); i++) {
    Node* n = _block.at(i);
    Node_List* p = my_pack(n);
@ -2451,7 +2439,7 @@ bool SuperWord::output() {
      Node* vn = NULL;
      Node* low_adr = p->at(0);
      Node* first   = executed_first(p);
-      if (can_process_post_loop) {
+      if (cl->is_rce_post_loop()) {
        // override vlen with the main loops vector length
        vlen = cl->slp_max_unroll();
      }
@ -2476,7 +2464,13 @@ bool SuperWord::output() {
        }
        Node* adr = low_adr->in(MemNode::Address);
        const TypePtr* atyp = n->adr_type();
-        vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p));
+        if (cl->is_rce_post_loop()) {
+          assert(vmask != NULL, "vector mask should be generated");
+          const TypeVect* vt = TypeVect::make(velt_basic_type(n), vlen);
+          vn = new LoadVectorMaskedNode(ctl, mem, adr, atyp, vt, vmask);
+        } else {
+          vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p));
+        }
        vlen_in_bytes = vn->as_LoadVector()->memory_size();
      } else if (n->is_Store()) {
        // Promote value to be stored to vector
@ -2493,7 +2487,13 @@ bool SuperWord::output() {
        Node* mem = first->in(MemNode::Memory);
        Node* adr = low_adr->in(MemNode::Address);
        const TypePtr* atyp = n->adr_type();
-        vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
+        if (cl->is_rce_post_loop()) {
+          assert(vmask != NULL, "vector mask should be generated");
+          const TypeVect* vt = TypeVect::make(velt_basic_type(n), vlen);
+          vn = new StoreVectorMaskedNode(ctl, mem, adr, val, atyp, vmask);
+        } else {
+          vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
+        }
        vlen_in_bytes = vn->as_StoreVector()->memory_size();
      } else if (VectorNode::is_scalar_rotate(n)) {
        Node* in1 = low_adr->in(1);
@ -2578,7 +2578,7 @@ bool SuperWord::output() {
        vn = VectorCastNode::make(vopc, in, bt, vlen);
        vlen_in_bytes = vn->as_Vector()->length_in_bytes();
      } else if (is_cmov_pack(p)) {
-        if (can_process_post_loop) {
+        if (cl->is_rce_post_loop()) {
          // do not refactor of flow in post loop context
          return false;
        }
@ -2668,14 +2668,6 @@ bool SuperWord::output() {
      }
      _igvn._worklist.push(vn);

-      if (can_process_post_loop) {
-        // first check if the vector size if the maximum vector which we can use on the machine,
-        // other vector size have reduced values for predicated data mapping.
-        if (vlen_in_bytes != (uint)MaxVectorSize) {
-          return false;
-        }
-      }
-
      if (vlen > max_vlen) {
        max_vlen = vlen;
      }
@ -2717,25 +2709,8 @@ bool SuperWord::output() {
            cl->mark_do_unroll_only();
          }
        }
-
-        if (do_reserve_copy()) {
-          if (can_process_post_loop) {
-            // Now create the difference of trip and limit and use it as our mask index.
-            // Note: We limited the unroll of the vectorized loop so that
-            //       only vlen-1 size iterations can remain to be mask programmed.
-            Node *incr = cl->incr();
-            SubINode *index = new SubINode(cl->limit(), cl->init_trip());
-            _igvn.register_new_node_with_optimizer(index);
-            SetVectMaskINode  *mask = new SetVectMaskINode(_phase->get_ctrl(cl->init_trip()), index);
-            _igvn.register_new_node_with_optimizer(mask);
-            // make this a single iteration loop
-            AddINode *new_incr = new AddINode(incr->in(1), mask);
-            _igvn.register_new_node_with_optimizer(new_incr);
-            _phase->set_ctrl(new_incr, _phase->get_ctrl(incr));
-            _igvn.replace_node(incr, new_incr);
-            cl->mark_is_multiversioned();
-            cl->loopexit()->add_flag(Node::Flag_has_vector_mask_set);
-          }
+        if (cl->is_rce_post_loop() && do_reserve_copy()) {
+          cl->mark_is_multiversioned();
        }
      }
    }
@ -2748,6 +2723,107 @@ bool SuperWord::output() {
  return true;
 }

+//-------------------------create_post_loop_vmask-------------------------
+// Check the post loop vectorizability and create a vector mask if yes.
+// Return NULL to bail out if post loop is not vectorizable.
+Node* SuperWord::create_post_loop_vmask() {
+  CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
+  assert(cl->is_rce_post_loop(), "Must be an rce post loop");
+  assert(!cl->is_reduction_loop(), "no vector reduction in post loop");
+  assert(abs(cl->stride_con()) == 1, "post loop stride can only be +/-1");
+
+  // Collect vector element types of all post loop packs. Also collect
+  // superword pointers of each memory access operation if the address
+  // expression is supported. (Note that vectorizable post loop should
+  // only have positive scale in counting-up loop and negative scale in
+  // counting-down loop.) Collected SWPointer(s) are also used for data
+  // dependence check next.
+  VectorElementSizeStats stats(_arena);
+  GrowableArray<SWPointer*> swptrs(_arena, _packset.length(), 0, NULL);
+  for (int i = 0; i < _packset.length(); i++) {
+    Node_List* p = _packset.at(i);
+    assert(p->size() == 1, "all post loop packs should be singleton");
+    Node* n = p->at(0);
+    BasicType bt = velt_basic_type(n);
+    if (!is_java_primitive(bt)) {
+      return NULL;
+    }
+    if (n->is_Mem()) {
+      SWPointer* mem_p = new (_arena) SWPointer(n->as_Mem(), this, NULL, false);
+      // For each memory access, we check if the scale (in bytes) in its
+      // address expression is equal to the data size times loop stride.
+      // With this, Only positive scales exist in counting-up loops and
+      // negative scales exist in counting-down loops.
+      if (mem_p->scale_in_bytes() != type2aelembytes(bt) * cl->stride_con()) {
+        return NULL;
+      }
+      swptrs.append(mem_p);
+    }
+    stats.record_size(type2aelembytes(bt));
+  }
+
+  // Find the vector data type for generating vector masks. Currently we
+  // don't support post loops with mixed vector data sizes
+  int unique_size = stats.unique_size();
+  BasicType vmask_bt;
+  switch (unique_size) {
+    case 1:  vmask_bt = T_BYTE; break;
+    case 2:  vmask_bt = T_SHORT; break;
+    case 4:  vmask_bt = T_INT; break;
+    case 8:  vmask_bt = T_LONG; break;
+    default: return NULL;
+  }
+
+  // Currently we can't remove this MaxVectorSize constraint. Without it,
+  // it's not guaranteed that the RCE'd post loop runs at most "vlen - 1"
+  // iterations, because the vector drain loop may not be cloned from the
+  // vectorized main loop. We should re-engineer PostLoopMultiversioning
+  // to fix this problem.
+  int vlen = cl->slp_max_unroll();
+  if (unique_size * vlen != MaxVectorSize) {
+    return NULL;
+  }
+
+  // Bail out if target doesn't support mask generator or masked load/store
+  if (!Matcher::match_rule_supported_vector(Op_LoadVectorMasked, vlen, vmask_bt)  ||
+      !Matcher::match_rule_supported_vector(Op_StoreVectorMasked, vlen, vmask_bt) ||
+      !Matcher::match_rule_supported_vector(Op_VectorMaskGen, vlen, vmask_bt)) {
+    return NULL;
+  }
+
+  // Bail out if potential data dependence exists between memory accesses
+  if (SWPointer::has_potential_dependence(swptrs)) {
+    return NULL;
+  }
+
+  // Create vector mask with the post loop trip count. Note there's another
+  // vector drain loop which is cloned from main loop before super-unrolling
+  // so the scalar post loop runs at most vlen-1 trips. Hence, this version
+  // only runs at most 1 iteration after vector mask transformation.
+  Node* trip_cnt;
+  Node* new_incr;
+  if (cl->stride_con() > 0) {
+    trip_cnt = new SubINode(cl->limit(), cl->init_trip());
+    new_incr = new AddINode(cl->phi(), trip_cnt);
+  } else {
+    trip_cnt = new SubINode(cl->init_trip(), cl->limit());
+    new_incr = new SubINode(cl->phi(), trip_cnt);
+  }
+  _igvn.register_new_node_with_optimizer(trip_cnt);
+  _igvn.register_new_node_with_optimizer(new_incr);
+  _igvn.replace_node(cl->incr(), new_incr);
+  Node* length = new ConvI2LNode(trip_cnt);
+  _igvn.register_new_node_with_optimizer(length);
+  Node* vmask = VectorMaskGenNode::make(length, vmask_bt);
+  _igvn.register_new_node_with_optimizer(vmask);
+
+  // Remove exit test to transform 1-iteration loop to straight-line code.
+  // This results in redundant cmp+branch instructions been eliminated.
+  Node *cl_exit = cl->loopexit();
+  _igvn.replace_input_of(cl_exit, 1, _igvn.intcon(0));
+  return vmask;
+}
+
 //------------------------------vector_opd---------------------------
 // Create a vector operand for the nodes in pack p for operand: in(opd_idx)
 Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
@ -2756,7 +2832,7 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
  Node* opd = p0->in(opd_idx);
  CountedLoopNode *cl = lpt()->_head->as_CountedLoop();

-  if (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()) {
+  if (cl->is_rce_post_loop()) {
    // override vlen with the main loops vector length
    vlen = cl->slp_max_unroll();
  }
@ -3767,7 +3843,7 @@ SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool anal
  // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
  Node* base = adr->in(AddPNode::Base);
  // The base address should be loop invariant
-  if (is_main_loop_member(base)) {
+  if (is_loop_member(base)) {
    assert(!valid(), "base address is loop variant");
    return;
  }
@ -3796,7 +3872,7 @@ SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool anal
      break; // stop looking at addp's
    }
  }
-  if (is_main_loop_member(adr)) {
+  if (is_loop_member(adr)) {
    assert(!valid(), "adr is loop variant");
    return;
  }
@ -3827,7 +3903,7 @@ SWPointer::SWPointer(SWPointer* p) :
  #endif
 {}

-bool SWPointer::is_main_loop_member(Node* n) const {
+bool SWPointer::is_loop_member(Node* n) const {
  Node* n_c = phase()->get_ctrl(n);
  return lpt()->is_member(phase()->get_loop(n_c));
 }
@ -3836,7 +3912,7 @@ bool SWPointer::invariant(Node* n) const {
  NOT_PRODUCT(Tracer::Depth dd;)
  Node* n_c = phase()->get_ctrl(n);
  NOT_PRODUCT(_tracer.invariant_1(n, n_c);)
-  bool is_not_member = !is_main_loop_member(n);
+  bool is_not_member = !is_loop_member(n);
  if (is_not_member && _slp->lp()->is_main_loop()) {
    // Check that n_c dominates the pre loop head node. If it does not, then we cannot use n as invariant for the pre loop
    // CountedLoopEndNode check because n_c is either part of the pre loop or between the pre and the main loop (illegal
@ -3906,7 +3982,7 @@ bool SWPointer::scaled_iv(Node* n) {
    NOT_PRODUCT(_tracer.scaled_iv_3(n, _scale);)
    return true;
  }
-  if (_analyze_only && (is_main_loop_member(n))) {
+  if (_analyze_only && (is_loop_member(n))) {
    _nstack->push(n, _stack_idx++);
  }

@ -3989,7 +4065,7 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
    return false;
  }

-  if (_analyze_only && is_main_loop_member(n)) {
+  if (_analyze_only && is_loop_member(n)) {
    _nstack->push(n, _stack_idx++);
  }
  if (opc == Op_AddI) {
@ -4023,14 +4099,14 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
    }
  }

-  if (!is_main_loop_member(n)) {
+  if (!is_loop_member(n)) {
    // 'n' is loop invariant. Skip ConvI2L and CastII nodes before checking if 'n' is dominating the pre loop.
    if (opc == Op_ConvI2L) {
      n = n->in(1);
    }
    if (n->Opcode() == Op_CastII) {
      // Skip CastII nodes
-      assert(!is_main_loop_member(n), "sanity");
+      assert(!is_loop_member(n), "sanity");
      n = n->in(1);
    }
    // Check if 'n' can really be used as invariant (not in main loop and dominating the pre loop).
@ -4046,6 +4122,34 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
  return false;
 }

+//-----------------has_potential_dependence-----------------
+// Check potential data dependence among all memory accesses.
+// We require every two accesses (with at least one store) of
+// the same element type has the same address expression.
+bool SWPointer::has_potential_dependence(GrowableArray<SWPointer*> swptrs) {
+  for (int i1 = 0; i1 < swptrs.length(); i1++) {
+    SWPointer* p1 = swptrs.at(i1);
+    MemNode* n1 = p1->mem();
+    BasicType bt1 = n1->memory_type();
+
+    // Iterate over remaining SWPointers
+    for (int i2 = i1 + 1; i2 < swptrs.length(); i2++) {
+      SWPointer* p2 = swptrs.at(i2);
+      MemNode* n2 = p2->mem();
+      BasicType bt2 = n2->memory_type();
+
+      // Data dependence exists between load-store, store-load
+      // or store-store with the same element type or subword
+      // size (subword load/store may have inaccurate type)
+      if ((n1->is_Store() || n2->is_Store()) &&
+          same_type_or_subword_size(bt1, bt2) && !p1->equal(*p2)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 //----------------------------print------------------------
 void SWPointer::print() {
 #ifndef PRODUCT
--- a/src/hotspot/share/opto/superword.hpp
+++ b/src/hotspot/share/opto/superword.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -244,6 +244,45 @@ class OrderedPair {
  static const OrderedPair initial;
 };

+// -----------------------VectorElementSizeStats-----------------------
+// Vector lane size statistics for loop vectorization with vector masks
+class VectorElementSizeStats {
+ private:
+  static const int NO_SIZE = -1;
+  static const int MIXED_SIZE = -2;
+  int* _stats;
+
+ public:
+  VectorElementSizeStats(Arena* a) : _stats(NEW_ARENA_ARRAY(a, int, 4)) {
+    memset(_stats, 0, sizeof(int) * 4);
+  }
+
+  void record_size(int size) {
+    assert(1 <= size && size <= 8 && is_power_of_2(size), "Illegal size");
+    _stats[exact_log2(size)]++;
+  }
+
+  int smallest_size() {
+    for (int i = 0; i <= 3; i++) {
+      if (_stats[i] > 0) return (1 << i);
+    }
+    return NO_SIZE;
+  }
+
+  int largest_size() {
+    for (int i = 3; i >= 0; i--) {
+      if (_stats[i] > 0) return (1 << i);
+    }
+    return NO_SIZE;
+  }
+
+  int unique_size() {
+    int small = smallest_size();
+    int large = largest_size();
+    return (small == large) ? small : MIXED_SIZE;
+  }
+};
+
 // -----------------------------SuperWord---------------------------------
 // Transforms scalar operations into packed (superword) operations.
 class SuperWord : public ResourceObj {
@ -510,6 +549,8 @@ class SuperWord : public ResourceObj {

  // Convert packs into vector node operations
  bool output();
+  // Create vector mask for post loop vectorization
+  Node* create_post_loop_vmask();
  // Create a vector operand for the nodes in pack p for operand: in(opd_idx)
  Node* vector_opd(Node_List* p, int opd_idx);
  // Can code be generated for pack p?
@ -572,7 +613,7 @@ class SuperWord : public ResourceObj {

 //------------------------------SWPointer---------------------------
 // Information about an address for dependence checking and vector alignment
-class SWPointer {
+class SWPointer : public ResourceObj {
 protected:
  MemNode*   _mem;           // My memory reference node
  SuperWord* _slp;           // SuperWord class
@ -594,7 +635,7 @@ class SWPointer {
  IdealLoopTree*  lpt() const   { return _slp->lpt(); }
  PhiNode*        iv() const    { return _slp->iv();  } // Induction var

-  bool is_main_loop_member(Node* n) const;
+  bool is_loop_member(Node* n) const;
  bool invariant(Node* n) const;

  // Match: k*iv + offset
@ -658,6 +699,8 @@ class SWPointer {
  static bool equal(int cmp)      { return cmp == Equal; }
  static bool comparable(int cmp) { return cmp < NotComparable; }

+  static bool has_potential_dependence(GrowableArray<SWPointer*> swptrs);
+
  void print();

 #ifndef PRODUCT
--- a/src/hotspot/share/opto/vectornode.hpp
+++ b/src/hotspot/share/opto/vectornode.hpp
@ -1296,17 +1296,6 @@ class ExtractDNode : public ExtractNode {
  virtual uint ideal_reg() const { return Op_RegD; }
 };

-//------------------------------SetVectMaskINode-------------------------------
-// Provide a mask for a vector predicate machine
-class SetVectMaskINode : public Node {
-public:
-  SetVectMaskINode(Node *c, Node *in1) : Node(c, in1) {}
-  virtual int Opcode() const;
-  const Type *bottom_type() const { return TypeInt::INT; }
-  virtual uint ideal_reg() const { return Op_RegI; }
-  virtual const Type *Value(PhaseGVN *phase) const { return TypeInt::INT; }
-};
-
 //------------------------------MacroLogicVNode-------------------------------
 // Vector logical operations packing node.
 class MacroLogicVNode : public VectorNode {
--- a/src/hotspot/share/runtime/vmStructs.cpp
+++ b/src/hotspot/share/runtime/vmStructs.cpp
@ -1557,7 +1557,6 @@
  declare_c2_type(ConvL2INode, Node)                                      \
  declare_c2_type(CastX2PNode, Node)                                      \
  declare_c2_type(CastP2XNode, Node)                                      \
-  declare_c2_type(SetVectMaskINode, Node)                                 \
  declare_c2_type(MemBarNode, MultiNode)                                  \
  declare_c2_type(MemBarAcquireNode, MemBarNode)                          \
  declare_c2_type(MemBarReleaseNode, MemBarNode)                          \
--- a/src/hotspot/share/utilities/globalDefinitions.hpp
+++ b/src/hotspot/share/utilities/globalDefinitions.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -813,6 +813,9 @@ extern int type2aelembytes(BasicType t, bool allow_address = false); // asserts
 inline int type2aelembytes(BasicType t, bool allow_address = false) { return _type2aelembytes[t]; }
 #endif

+inline bool same_type_or_subword_size(BasicType t1, BasicType t2) {
+  return (t1 == t2) || (is_subword_type(t1) && type2aelembytes(t1) == type2aelembytes(t2));
+}

 // JavaValue serves as a container for arbitrary Java values.

--- a/test/hotspot/jtreg/compiler/c2/irTests/TestSuperwordFailsUnrolling.java
+++ b/test/hotspot/jtreg/compiler/c2/irTests/TestSuperwordFailsUnrolling.java
@ -49,7 +49,7 @@ public class TestSuperwordFailsUnrolling {
    }

    @Test
-    @IR(applyIf = { "UsePopCountInstruction", "true" }, counts = { IRNode.POPCOUNT_L, "10" })
+    @IR(applyIf = { "UsePopCountInstruction", "true" }, counts = { IRNode.POPCOUNT_L, ">=10" })
    private static int test(long[] array1, long[] array2) {
        v = 0;
        for (int i = 0; i < array1.length; i++) {
--- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayCopyTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayCopyTest.java
@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on array copy
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.ArrayCopyTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+public class ArrayCopyTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private   byte[] bytes;
+    private  short[] shorts;
+    private   char[] chars;
+    private    int[] ints;
+    private   long[] longs;
+    private  float[] floats;
+    private double[] doubles;
+
+    public ArrayCopyTest() {
+        bytes   = new   byte[SIZE];
+        shorts  = new  short[SIZE];
+        chars   = new   char[SIZE];
+        ints    = new    int[SIZE];
+        longs   = new   long[SIZE];
+        floats  = new  float[SIZE];
+        doubles = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            bytes[i]   = (byte)  (-i / 100);
+            shorts[i]  = (short) (30 * i - 12345);
+            chars[i]   = (char)  (i * 55);
+            ints[i]    = -4444 * i;
+            longs[i]   = -999999999L * i + 99999999999L;
+            floats[i]  = (float) (i * 2.3e7f);
+            doubles[i] = -3e30 * i * i;
+        }
+    }
+
+    // ---------------- Simple Copy ----------------
+    @Test
+    public byte[] copyByteArray() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = bytes[i];
+        }
+        return res;
+    }
+
+    @Test
+    public short[] copyShortArray() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = shorts[i];
+        }
+        return res;
+    }
+
+    @Test
+    public char[] copyCharArray() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = chars[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] copyIntArray() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = ints[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long[] copyLongArray() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = longs[i];
+        }
+        return res;
+    }
+
+    @Test
+    public float[] copyFloatArray() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = floats[i];
+        }
+        return res;
+    }
+
+    @Test
+    public double[] copyDoubleArray() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = doubles[i];
+        }
+        return res;
+    }
+
+    // ---------------- Multiple Copies ----------------
+    @Test
+    public float[] chainedCopy() {
+        float[] res1 = new float[SIZE];
+        float[] res2 = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res2[i] = res1[i] = floats[i];
+        }
+        return res2;
+    }
+
+    @Test
+    public int[] copy2ArraysSameSize() {
+        int[] res1 = new int[SIZE];
+        float[] res2 = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = ints[i];
+            res2[i] = floats[i];
+        }
+        return res1;
+    }
+
+    @Test
+    public double[] copy2ArraysDifferentSizes() {
+        int[] res1 = new int[SIZE];
+        double[] res2 = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = ints[i];
+            res2[i] = doubles[i];
+        }
+        return res2;
+    }
+
+    // ---------------- Copy Between Signed & Unsigned ----------------
+    @Test
+    public char[] copyFromSignedToUnsigned() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) shorts[i];
+        }
+        return res;
+    }
+
+    @Test
+    public short[] copyFromUnsignedToSigned() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) chars[i];
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayIndexFillTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayIndexFillTest.java
@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on array index fill
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.ArrayIndexFillTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+public class ArrayIndexFillTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private int[] a;
+
+    public ArrayIndexFillTest() {
+        a = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = -5050 * i;
+        }
+    }
+
+    @Test
+    public byte[] fillByteArray() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) i;
+        }
+        return res;
+    }
+
+    @Test
+    public short[] fillShortArray() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) i;
+        }
+        return res;
+    }
+
+    @Test
+    public char[] fillCharArray() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) i;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] fillIntArray() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = i;
+        }
+        return res;
+    }
+
+    @Test
+    public long[] fillLongArray() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = i;
+        }
+        return res;
+    }
+
+    @Test
+    public short[] fillShortArrayWithShortIndex() {
+        short[] res = new short[SIZE];
+        for (short i = 0; i < SIZE; i++) {
+            res[i] = i;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] fillMultipleArraysDifferentTypes1() {
+        int[] res1 = new int[SIZE];
+        short[] res2 = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = i;
+            res2[i] = (short) i;
+        }
+        return res1;
+    }
+
+    @Test
+    public char[] fillMultipleArraysDifferentTypes2() {
+        int[] res1 = new int[SIZE];
+        char[] res2 = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = i;
+            res2[i] = (char) i;
+        }
+        return res2;
+    }
+
+    @Test
+    public int[] fillNonIndexValue() {
+        int[] res = new int[SIZE];
+        int val = 10000;
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = val++;
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayInvariantFillTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayInvariantFillTest.java
@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on array invariant fill
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   -XX:-OptimizeFill
+ *                   compiler.vectorization.runner.ArrayInvariantFillTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+import java.util.Random;
+
+public class ArrayInvariantFillTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private int intInv;
+    private char charInv;
+    private float floatInv;
+    private double doubleInv;
+
+    public ArrayInvariantFillTest() {
+        Random ran = new Random(10);
+        intInv = ran.nextInt();
+        charInv = (char) ran.nextInt();
+        floatInv = ran.nextFloat();
+        doubleInv = ran.nextDouble();
+    }
+
+    // ---------------- Simple Fill ----------------
+    @Test
+    public byte[] fillByteArray() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) 10;
+        }
+        return res;
+    }
+
+    @Test
+    public short[] fillShortArray() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) -3333;
+        }
+        return res;
+    }
+
+    @Test
+    public char[] fillCharArray() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) 55555;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] fillIntArray() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = 2147483647;
+        }
+        return res;
+    }
+
+    @Test
+    public long[] fillLongArray() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = -2222222222222222L;
+        }
+        return res;
+    }
+
+    @Test
+    public float[] fillFloatArray() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = 3.234567e8f;
+        }
+        return res;
+    }
+
+    @Test
+    public double[] fillDoubleArray() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = -9.87654321e50;
+        }
+        return res;
+    }
+
+    // ---------------- Fill With Type Change ----------------
+    @Test
+    public long[] fillLongArrayWithInt() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = intInv;
+        }
+        return res;
+    }
+
+    @Test
+    public long[] fillLongArrayWithUnsigned() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = charInv;
+        }
+        return res;
+    }
+
+    @Test
+    public long[] fillLongArrayWithFloat() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (long) floatInv;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] fillIntArrayWithDouble() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (int) doubleInv;
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java
@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on bug-prone shift operation
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.ArrayShiftOpTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+import java.util.Random;
+
+public class ArrayShiftOpTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private int[] ints;
+    private long[] longs;
+    private short[] shorts1;
+    private short[] shorts2;
+    private int largeDist;
+
+    public ArrayShiftOpTest() {
+        ints = new int[SIZE];
+        longs = new long[SIZE];
+        shorts1 = new short[SIZE];
+        shorts2 = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            ints[i] = -888999 * i;
+            longs[i] = 999998888800000L * i;
+            shorts1[i] = (short) (4 * i);
+            shorts2[i] = (short) (-3 * i);
+        }
+        Random ran = new Random(999);
+        largeDist = 123;
+    }
+
+    @Test
+    public int[] intCombinedRotateShift() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (ints[i] << 14) | (ints[i] >>> 18);
+        }
+        return res;
+    }
+
+    @Test
+    public long[] longCombinedRotateShift() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (longs[i] << 55) | (longs[i] >>> 9);
+        }
+        return res;
+    }
+
+    @Test
+    public int[] intShiftLargeDistConstant() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = ints[i] >> 35;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] intShiftLargeDistInvariant() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = ints[i] >> largeDist;
+        }
+        return res;
+    }
+
+    @Test
+    public long[] longShiftLargeDistConstant() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = longs[i] << 77;
+        }
+        return res;
+    }
+
+    @Test
+    public long[] longShiftLargeDistInvariant() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = longs[i] >>> largeDist;
+        }
+        return res;
+    }
+
+    @Test
+    // Note that any shift operation with distance value from another array
+    // cannot be vectorized since C2 vector shift node doesn't support it.
+    public long[] variantShiftDistance() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = longs[i] >> ints[i];
+        }
+        return res;
+    }
+
+    @Test
+    // Note that unsigned shift right on subword signed integer types can't
+    // be vectorized since the sign extension bits would be lost.
+    public short[] vectorUnsignedShiftRight() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (shorts2[i] >>> 3);
+        }
+        return res;
+    }
+
+    @Test
+    // Note that right shift operations on subword expressions cannot be
+    // vectorized since precise type info about signness is missing.
+    public short[] subwordExpressionRightShift() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) ((shorts1[i] + shorts2[i]) >> 4);
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java
@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on array type conversions
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.ArrayTypeConvertTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+public class ArrayTypeConvertTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private   byte[] bytes;
+    private  short[] shorts;
+    private   char[] chars;
+    private    int[] ints;
+    private   long[] longs;
+    private  float[] floats;
+    private double[] doubles;
+
+    public ArrayTypeConvertTest() {
+        bytes   = new   byte[SIZE];
+        shorts  = new  short[SIZE];
+        chars   = new   char[SIZE];
+        ints    = new    int[SIZE];
+        longs   = new   long[SIZE];
+        floats  = new  float[SIZE];
+        doubles = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            bytes[i]   = (byte)  (-i / 128);
+            shorts[i]  = (short) (i / 3 - 12345);
+            chars[i]   = (char)  (i * 2);
+            ints[i]    = -22 * i;
+            longs[i]   = -258L * i + 99L;
+            floats[i]  = (float) (i * 2.498f);
+            doubles[i] = -3 * i;
+        }
+    }
+
+    // ---------------- Integer Extension ----------------
+    @Test
+    public int[] signExtension() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = shorts[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] zeroExtension() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = chars[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] signExtensionFromByte() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = bytes[i];
+        }
+        return res;
+    }
+
+    // ---------------- Integer Narrow ----------------
+    @Test
+    public short[] narrowToSigned() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) ints[i];
+        }
+        return res;
+    }
+
+    @Test
+    public char[] narrowToUnsigned() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) ints[i];
+        }
+        return res;
+    }
+
+    @Test
+    public byte[] NarrowToByte() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) ints[i];
+        }
+        return res;
+    }
+
+    // ---------------- Convert I/L to F/D ----------------
+    @Test
+    public float[] convertIntToFloat() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (float) ints[i];
+        }
+        return res;
+    }
+
+    @Test
+    public double[] convertIntToDouble() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (double) ints[i];
+        }
+        return res;
+    }
+
+    @Test
+    public float[] convertLongToFloat() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (float) longs[i];
+        }
+        return res;
+    }
+
+    @Test
+    public double[] convertLongToDouble() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (double) longs[i];
+        }
+        return res;
+    }
+
+    // ---------------- Convert Subword-I to F/D ----------------
+    @Test
+    public float[] convertShortToFloat() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (float) shorts[i];
+        }
+        return res;
+    }
+
+    @Test
+    public double[] convertShortToDouble() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (double) shorts[i];
+        }
+        return res;
+    }
+
+    @Test
+    public float[] convertCharToFloat() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (float) chars[i];
+        }
+        return res;
+    }
+
+    @Test
+    public double[] convertCharToDouble() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (double) chars[i];
+        }
+        return res;
+    }
+
+    // ---------------- Convert F/D to I/L ----------------
+    @Test
+    public int[] convertFloatToInt() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (int) floats[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long[] convertFloatToLong() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (long) floats[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] convertDoubleToInt() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (int) doubles[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long[] convertDoubleToLong() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (long) doubles[i];
+        }
+        return res;
+    }
+
+    // ---------------- Convert F/D to Subword-I ----------------
+    @Test
+    public short[] convertFloatToShort() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) floats[i];
+        }
+        return res;
+    }
+
+    @Test
+    public char[] convertFloatToChar() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) floats[i];
+        }
+        return res;
+    }
+
+    @Test
+    public short[] convertDoubleToShort() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) doubles[i];
+        }
+        return res;
+    }
+
+    @Test
+    public char[] convertDoubleToChar() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) doubles[i];
+        }
+        return res;
+    }
+
+    // ---------------- Convert Between F & D ----------------
+    @Test
+    public double[] convertFloatToDouble() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (double) floats[i];
+        }
+        return res;
+    }
+
+    @Test
+    public float[] convertDoubleToFloat() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (float) doubles[i];
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayUnsafeOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayUnsafeOpTest.java
@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on array unsafe operations
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.ArrayUnsafeOpTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+import java.lang.reflect.Field;
+
+import sun.misc.Unsafe;
+
+public class ArrayUnsafeOpTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private static Unsafe unsafe;
+
+    public ArrayUnsafeOpTest() throws Exception {
+        Class klass = Unsafe.class;
+        Field field = klass.getDeclaredField("theUnsafe");
+        field.setAccessible(true);
+        unsafe = (Unsafe) field.get(null);
+    }
+
+    @Test
+    public byte[] arrayUnsafeFill() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < 500; i++) {
+            unsafe.putByte(res, i + 24, (byte) i);
+        }
+        return res;
+    }
+
+    @Test
+    public byte[] arrayUnsafeFillWithOneAddp() {
+        byte[] res = new byte[SIZE];
+        for (int i = 123; i < 500; i++) {
+            unsafe.putByte(res, i, (byte) i);
+        }
+        return res;
+    }
+
+    @Test
+    // Note that this case cannot be vectorized since data dependence
+    // exists between two unsafe stores of different types on the same
+    // array reference.
+    public int[] arrayUnsafeFillTypeMismatch() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < 500; i++) {
+            unsafe.putByte(res, i + 24, (byte) i);
+            unsafe.putShort(res, i + 28, (short) 0);
+        }
+        return res;
+    }
+
+    @Test
+    // Note that this case cannot be vectorized since data dependence
+    // exists between adjacent iterations. (The memory address storing
+    // an int array is not increased by 4 per iteration.)
+    public int[] arrayUnsafeFillAddrIncrMismatch() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < 500; i++) {
+            unsafe.putInt(res, i + 24, i);
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicBooleanOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicBooleanOpTest.java
@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on basic boolean operations
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.BasicBooleanOpTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+public class BasicBooleanOpTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private boolean[] a;
+    private boolean[] b;
+    private boolean[] c;
+
+    public BasicBooleanOpTest() {
+        a = new boolean[SIZE];
+        b = new boolean[SIZE];
+        c = new boolean[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = true;
+            b[i] = false;
+        }
+    }
+
+    // ---------------- Logic ----------------
+    @Test
+    public boolean[] vectorNot() {
+        boolean[] res = new boolean[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = !a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public boolean[] vectorAnd() {
+        boolean[] res = new boolean[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] & b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public boolean[] vectorOr() {
+        boolean[] res = new boolean[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] | b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public boolean[] vectorXor() {
+        boolean[] res = new boolean[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] ^ b[i];
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicByteOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicByteOpTest.java
@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on basic byte operations
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.BasicByteOpTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+public class BasicByteOpTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private byte[] a;
+    private byte[] b;
+    private byte[] c;
+
+    public BasicByteOpTest() {
+        a = new byte[SIZE];
+        b = new byte[SIZE];
+        c = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = (byte) (-3 * i);
+            b[i] = (byte) (i + 4);
+            c[i] = (byte) -90;
+        }
+    }
+
+    // ---------------- Arithmetic ----------------
+    @Test
+    public byte[] vectorNeg() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) -a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public byte[] vectorAbs() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) Math.abs(a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public byte[] vectorAdd() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) (a[i] + b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public byte[] vectorSub() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) (a[i] - b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public byte[] vectorMul() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) (a[i] * b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public byte[] vectorMulAdd() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) (c[i] + a[i] * b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public byte[] vectorMulSub() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) (c[i] - a[i] * b[i]);
+        }
+        return res;
+    }
+
+    // ---------------- Logic ----------------
+    @Test
+    public byte[] vectorNot() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) ~a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public byte[] vectorAnd() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) (a[i] & b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public byte[] vectorOr() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) (a[i] | b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public byte[] vectorXor() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) (a[i] ^ b[i]);
+        }
+        return res;
+    }
+
+    // ---------------- Shift ----------------
+    @Test
+    public byte[] vectorShiftLeft() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) (a[i] << 3);
+        }
+        return res;
+    }
+
+    @Test
+    public byte[] vectorSignedShiftRight() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) (a[i] >> 2);
+        }
+        return res;
+    }
+
+    @Test
+    // Note that unsigned shift right on subword signed integer types can
+    // not be vectorized since the sign extension bit would be lost.
+    public byte[] vectorUnsignedShiftRight() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) (a[i] >>> 5);
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicCharOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicCharOpTest.java
@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on basic char operations
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.BasicCharOpTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+public class BasicCharOpTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private char[] a;
+    private char[] b;
+    private char[] c;
+
+    public BasicCharOpTest() {
+        a = new char[SIZE];
+        b = new char[SIZE];
+        c = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = (char) (20 * i);
+            b[i] = (char) (i + 44444);
+            c[i] = (char) 10000;
+        }
+    }
+
+    // ---------------- Arithmetic ----------------
+    @Test
+    public char[] vectorNeg() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) -a[i];
+        }
+        return res;
+    }
+
+    @Test
+    // Note that Math.abs() on unsigned subword types can NOT be vectorized
+    // since all the values are non-negative according to the semantics.
+    public char[] vectorAbs() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) Math.abs(a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public char[] vectorAdd() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) (a[i] + b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public char[] vectorSub() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) (a[i] - b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public char[] vectorMul() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) (a[i] * b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public char[] vectorMulAdd() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) (c[i] + a[i] * b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public char[] vectorMulSub() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) (c[i] - a[i] * b[i]);
+        }
+        return res;
+    }
+
+    // ---------------- Logic ----------------
+    @Test
+    public char[] vectorNot() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) ~a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public char[] vectorAnd() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) (a[i] & b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public char[] vectorOr() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) (a[i] | b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public char[] vectorXor() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) (a[i] ^ b[i]);
+        }
+        return res;
+    }
+
+    // ---------------- Shift ----------------
+    @Test
+    public char[] vectorShiftLeft() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) (a[i] << 3);
+        }
+        return res;
+    }
+
+    @Test
+    public char[] vectorSignedShiftRight() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) (a[i] >> 2);
+        }
+        return res;
+    }
+
+    @Test
+    public char[] vectorUnsignedShiftRight() {
+        char[] res = new char[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (char) (a[i] >>> 5);
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java
@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on basic double operations
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.BasicDoubleOpTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+public class BasicDoubleOpTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private double[] a;
+    private double[] b;
+    private double[] c;
+
+    public BasicDoubleOpTest() {
+        a = new double[SIZE];
+        b = new double[SIZE];
+        c = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = 850.0 * i + 22222.22;
+            b[i] = -12345.678;
+            c[i] = -1.23456e7;
+        }
+    }
+
+    // ---------------- Arithmetic ----------------
+    @Test
+    public double[] vectorNeg() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = -a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorAbs() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.abs(a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorSqrt() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.sqrt(a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorCeil() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.ceil(a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorFloor() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.floor(a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorRint() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.rint(a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorAdd() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] + b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorSub() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] - b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorMul() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] * b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorDiv() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] / b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorMax() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.max(a[i], b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorMin() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.min(a[i], b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorMulAdd() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.fma(a[i], b[i], c[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorMulSub1() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.fma(-a[i], b[i], c[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorMulSub2() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.fma(a[i], -b[i], c[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorNegateMulAdd1() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.fma(-a[i], b[i], -c[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorNegateMulAdd2() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.fma(a[i], -b[i], -c[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double[] vectorNegateMulSub() {
+        double[] res = new double[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.fma(a[i], b[i], -c[i]);
+        }
+        return res;
+    }
+
+    // ---------------- Reduction ----------------
+    @Test
+    public double reductionAdd() {
+        double res = 0.0;
+        for (int i = 0; i < SIZE; i++) {
+            res += a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public double reductionMax() {
+        double res = Double.MIN_VALUE;
+        for (int i = 0; i < SIZE; i++) {
+            res = Math.max(res, a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public double reductionMin() {
+        double res = Double.MAX_VALUE;
+        for (int i = 0; i < SIZE; i++) {
+            res = Math.min(res, a[i]);
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java
@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on basic float operations
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.BasicFloatOpTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+public class BasicFloatOpTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private float[] a;
+    private float[] b;
+    private float[] c;
+
+    public BasicFloatOpTest() {
+        a = new float[SIZE];
+        b = new float[SIZE];
+        c = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = 850.0f * i + 22222.22f;
+            b[i] = -12345.678f;
+            c[i] = -1.23456e7f;
+        }
+    }
+
+    // ---------------- Arithmetic ----------------
+    @Test
+    public float[] vectorNeg() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = -a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorAbs() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.abs(a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorSqrt() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (float) Math.sqrt(a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorAdd() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] + b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorSub() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] - b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorMul() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] * b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorDiv() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] / b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorMax() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.max(a[i], b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorMin() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.min(a[i], b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorMulAdd() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.fma(a[i], b[i], c[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorMulSub1() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.fma(-a[i], b[i], c[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorMulSub2() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.fma(a[i], -b[i], c[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorNegateMulAdd1() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.fma(-a[i], b[i], -c[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorNegateMulAdd2() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.fma(a[i], -b[i], -c[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public float[] vectorNegateMulSub() {
+        float[] res = new float[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.fma(a[i], b[i], -c[i]);
+        }
+        return res;
+    }
+
+    // ---------------- Reduction ----------------
+    @Test
+    public float reductionAdd() {
+        float res = 0.0f;
+        for (int i = 0; i < SIZE; i++) {
+            res += a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public float reductionMax() {
+        float res = Float.MIN_VALUE;
+        for (int i = 0; i < SIZE; i++) {
+            res = Math.max(res, a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public float reductionMin() {
+        float res = Float.MAX_VALUE;
+        for (int i = 0; i < SIZE; i++) {
+            res = Math.min(res, a[i]);
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicIntOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicIntOpTest.java
@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on basic int operations
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.BasicIntOpTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+public class BasicIntOpTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private int[] a;
+    private int[] b;
+    private int[] c;
+
+    public BasicIntOpTest() {
+        a = new int[SIZE];
+        b = new int[SIZE];
+        c = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = -25 * i;
+            b[i] = 333 * i + 9999;
+            c[i] = -987654321;
+        }
+    }
+
+    // ---------------- Arithmetic ----------------
+    @Test
+    public int[] vectorNeg() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = -a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] vectorAbs() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.abs(a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public int[] vectorAdd() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] + b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] vectorSub() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] - b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] vectorMul() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] * b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] vectorMulAdd() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = c[i] + a[i] * b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] vectorMulSub() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = c[i] - a[i] * b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] vectorPopCount() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Integer.bitCount(a[i]);
+        }
+        return res;
+    }
+
+    // ---------------- Logic ----------------
+    @Test
+    public int[] vectorNot() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = ~a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] vectorAnd() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] & b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] vectorOr() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] | b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] vectorXor() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] ^ b[i];
+        }
+        return res;
+    }
+
+    // ---------------- Shift ----------------
+    @Test
+    public int[] vectorShiftLeft() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] << 3;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] vectorSignedShiftRight() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] >> 2;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] vectorUnsignedShiftRight() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] >>> 5;
+        }
+        return res;
+    }
+
+    // ---------------- Reduction ----------------
+    @Test
+    public int reductionAdd() {
+        int res = 0;
+        for (int i = 0; i < SIZE; i++) {
+            res += a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int reductionAnd() {
+        int res = 0xffffffff;
+        for (int i = 0; i < SIZE; i++) {
+            res &= a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int reductionOr() {
+        int res = 0;
+        for (int i = 0; i < SIZE; i++) {
+            res |= a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int reductionXor() {
+        int res = 0x0f0f0f0f;
+        for (int i = 0; i < SIZE; i++) {
+            res ^= a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int reductionMax() {
+        int res = Integer.MIN_VALUE;
+        for (int i = 0; i < SIZE; i++) {
+            res = Math.max(res, a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public int reductionMin() {
+        int res = Integer.MAX_VALUE;
+        for (int i = 0; i < SIZE; i++) {
+            res = Math.min(res, a[i]);
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicLongOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicLongOpTest.java
@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on basic long operations
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.BasicLongOpTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+public class BasicLongOpTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private long[] a;
+    private long[] b;
+    private long[] c;
+
+    public BasicLongOpTest() {
+        a = new long[SIZE];
+        b = new long[SIZE];
+        c = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = -123456789L * i;
+            b[i] = 66666666L * i + 8888888888888888888L;
+            c[i] = -987654321098765L;
+        }
+    }
+
+    // ---------------- Arithmetic ----------------
+    @Test
+    public long[] vectorNeg() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = -a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long[] vectorAbs() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = Math.abs(a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public long[] vectorAdd() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] + b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long[] vectorSub() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] - b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long[] vectorMul() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] * b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long[] vectorMulAdd() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = c[i] + a[i] * b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long[] vectorMulSub() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = c[i] - a[i] * b[i];
+        }
+        return res;
+    }
+
+    // ---------------- Logic ----------------
+    @Test
+    public long[] vectorNot() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = ~a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long[] vectorAnd() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] & b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long[] vectorOr() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] | b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long[] vectorXor() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] ^ b[i];
+        }
+        return res;
+    }
+
+    // ---------------- Shift ----------------
+    @Test
+    public long[] vectorShiftLeft() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] << 3;
+        }
+        return res;
+    }
+
+    @Test
+    public long[] vectorSignedShiftRight() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] >> 2;
+        }
+        return res;
+    }
+
+    @Test
+    public long[] vectorUnsignedShiftRight() {
+        long[] res = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] >>> 5;
+        }
+        return res;
+    }
+
+    // ---------------- Reduction ----------------
+    @Test
+    public long reductionAdd() {
+        long res = 0L;
+        for (int i = 0; i < SIZE; i++) {
+            res += a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long reductionAnd() {
+        long res = 0xffffffffffffffffL;
+        for (int i = 0; i < SIZE; i++) {
+            res &= a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long reductionOr() {
+        long res = 0L;
+        for (int i = 0; i < SIZE; i++) {
+            res |= a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long reductionXor() {
+        long res = 0xf0f0f0f0f0f0f0f0L;
+        for (int i = 0; i < SIZE; i++) {
+            res ^= a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long reductionMax() {
+        long res = Long.MIN_VALUE;
+        for (int i = 0; i < SIZE; i++) {
+            res = Math.max(res, a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public long reductionMin() {
+        long res = Long.MAX_VALUE;
+        for (int i = 0; i < SIZE; i++) {
+            res = Math.min(res, a[i]);
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java
@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on basic short operations
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.BasicShortOpTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+public class BasicShortOpTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private short[] a;
+    private short[] b;
+    private short[] c;
+
+    public BasicShortOpTest() {
+        a = new short[SIZE];
+        b = new short[SIZE];
+        c = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = (short) (-12 * i);
+            b[i] = (short) (9 * i + 8888);
+            c[i] = (short) -32323;
+        }
+    }
+
+    // ---------------- Arithmetic ----------------
+    @Test
+    public short[] vectorNeg() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) -a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public short[] vectorAbs() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) Math.abs(a[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public short[] vectorAdd() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (a[i] + b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public short[] vectorSub() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (a[i] - b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public short[] vectorMul() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (a[i] * b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public short[] vectorMulAdd() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (c[i] + a[i] * b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public short[] vectorMulSub() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (c[i] - a[i] * b[i]);
+        }
+        return res;
+    }
+
+    // ---------------- Logic ----------------
+    @Test
+    public short[] vectorNot() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) ~a[i];
+        }
+        return res;
+    }
+
+    @Test
+    public short[] vectorAnd() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (a[i] & b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public short[] vectorOr() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (a[i] | b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public short[] vectorXor() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (a[i] ^ b[i]);
+        }
+        return res;
+    }
+
+    // ---------------- Shift ----------------
+    @Test
+    public short[] vectorShiftLeft() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (a[i] << 3);
+        }
+        return res;
+    }
+
+    @Test
+    public short[] vectorSignedShiftRight() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (a[i] >> 2);
+        }
+        return res;
+    }
+
+    @Test
+    // Note that unsigned shift right on subword signed integer types can
+    // not be vectorized since the sign extension bits would be lost.
+    public short[] vectorUnsignedShiftRight() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (a[i] >>> 5);
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopArrayIndexComputeTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopArrayIndexComputeTest.java
@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on loop array index computation
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.LoopArrayIndexComputeTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+import java.util.Random;
+
+public class LoopArrayIndexComputeTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private int[] ints;
+    private short[] shorts;
+    private char[] chars;
+    private byte[] bytes;
+    private boolean[] booleans;
+
+    private int inv1;
+    private int inv2;
+
+    public LoopArrayIndexComputeTest() {
+        ints = new int[SIZE];
+        shorts = new short[SIZE];
+        chars = new char[SIZE];
+        bytes = new byte[SIZE];
+        booleans = new boolean[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            ints[i] = 499 * i;
+            shorts[i] = (short) (-13 * i + 5);
+            chars[i] = (char) (i << 3);
+            bytes[i] = (byte) (i >> 2 + 3);
+            booleans[i] = (i % 5 == 0);
+        }
+        Random ran = new Random(10);
+        inv1 = Math.abs(ran.nextInt() % 10) + 1;
+        inv2 = Math.abs(ran.nextInt() % 10) + 1;
+    }
+
+    // ---------------- Linear Indexes ----------------
+    @Test
+    public int[] indexPlusConstant() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE / 2; i++) {
+            res[i + 1] = ints[i + 1] + 999;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] indexMinusConstant() {
+        int[] res = new int[SIZE];
+        for (int i = SIZE / 2; i < SIZE; i++) {
+            res[i - 49] = ints[i - 49] * i;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] indexPlusInvariant() {
+        int[] res = new int[SIZE];
+        System.arraycopy(ints, 0, res, 0, SIZE);
+        for (int i = 0; i < SIZE / 4; i++) {
+            res[i + inv1] *= ints[i + inv1];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] indexMinusInvariant() {
+        int[] res = new int[SIZE];
+        System.arraycopy(ints, 0, res, 0, SIZE);
+        for (int i = SIZE / 3; i < SIZE / 2; i++) {
+            res[i - inv2] *= (ints[i - inv2] + (i >> 2));
+        }
+        return res;
+    }
+
+    @Test
+    public int[] indexWithInvariantAndConstant() {
+        int[] res = new int[SIZE];
+        System.arraycopy(ints, 0, res, 0, SIZE);
+        for (int i = 10; i < SIZE / 4; i++) {
+            res[i + inv1 - 1] *= (ints[i + inv1 - 1] + 1);
+        }
+        return res;
+    }
+
+    @Test
+    public int[] indexWithTwoInvariants() {
+        int[] res = new int[SIZE];
+        System.arraycopy(ints, 0, res, 0, SIZE);
+        for (int i = 10; i < SIZE / 4; i++) {
+            res[i + inv1 + inv2] -= ints[i + inv1 + inv2];
+        }
+        return res;
+    }
+
+    @Test
+    // Note that this case cannot be vectorized due to data dependence
+    public int[] indexWithDifferentConstants() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE / 4; i++) {
+            res[i] = ints[i + 1];
+        }
+        return res;
+    }
+
+    @Test
+    // Note that this case cannot be vectorized due to data dependence
+    public int[] indexWithDifferentInvariants() {
+        int[] res = new int[SIZE];
+        for (int i = SIZE / 4; i < SIZE / 2; i++) {
+            res[i + inv1] = ints[i - inv2];
+        }
+        return res;
+    }
+
+    @Test
+    public int indexWithDifferentConstantsLoadOnly() {
+        int res1 = 0;
+        int res2 = 0;
+        for (int i = 0; i < SIZE / 4; i++) {
+            res1 += ints[i + 2];
+            res2 += ints[i + 15];
+        }
+        return res1 * res2;
+    }
+
+    @Test
+    public int indexWithDifferentInvariantsLoadOnly() {
+        int res1 = 0;
+        int res2 = 0;
+        for (int i = SIZE / 4; i < SIZE / 2; i++) {
+            res1 += ints[i + inv1];
+            res2 += ints[i - inv2];
+        }
+        return res1 * res2;
+    }
+
+    @Test
+    public int[] scaledIndex() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE / 3; i++) {
+            res[2 * i] = ints[2 * i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] scaledIndexWithConstantOffset() {
+        int[] res = new int[SIZE];
+        System.arraycopy(ints, 0, res, 0, SIZE);
+        for (int i = 0; i < SIZE / 4; i++) {
+            res[2 * i + 3] *= ints[2 * i + 3];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] scaledIndexWithInvariantOffset() {
+        int[] res = new int[SIZE];
+        System.arraycopy(ints, 0, res, 0, SIZE);
+        for (int i = 0; i < SIZE / 4; i++) {
+            res[2 * i + inv1] *= ints[2 * i + inv1];
+        }
+        return res;
+    }
+
+    @Test
+    // Note that this case cannot be vectorized due to data dependence
+    // between src and dest of the assignment.
+    public int[] sameArrayWithDifferentIndex() {
+        int[] res = new int[SIZE];
+        System.arraycopy(ints, 0, res, 0, SIZE);
+        for (int i = 1, j = 0; i < 100; i++, j++) {
+            res[i] += res[j];
+        }
+        return res;
+    }
+
+    // ---------------- Subword Type Arrays ----------------
+    @Test
+    // Note that this case cannot be vectorized due to data dependence
+    public short[] shortArrayWithDependence() {
+        short[] res = new short[SIZE];
+        System.arraycopy(shorts, 0, res, 0, SIZE);
+        for (int i = 0; i < SIZE / 2; i++) {
+            res[i] *= shorts[i + 1];
+        }
+        return res;
+    }
+
+    @Test
+    // Note that this case cannot be vectorized due to data dependence
+    public char[] charArrayWithDependence() {
+        char[] res = new char[SIZE];
+        System.arraycopy(chars, 0, res, 0, SIZE);
+        for (int i = 0; i < SIZE / 2; i++) {
+            res[i] *= chars[i + 2];
+        }
+        return res;
+    }
+
+    @Test
+    // Note that this case cannot be vectorized due to data dependence
+    public byte[] byteArrayWithDependence() {
+        byte[] res = new byte[SIZE];
+        System.arraycopy(bytes, 0, res, 0, SIZE);
+        for (int i = 0; i < SIZE / 2; i++) {
+            res[i] *= bytes[i + 3];
+        }
+        return res;
+    }
+
+    @Test
+    // Note that this case cannot be vectorized due to data dependence
+    public boolean[] booleanArrayWithDependence() {
+        boolean[] res = new boolean[SIZE];
+        System.arraycopy(booleans, 0, res, 0, SIZE);
+        for (int i = 0; i < SIZE / 2; i++) {
+            res[i] |= booleans[i + 4];
+        }
+        return res;
+    }
+
+    // ---------------- Multiple Operations ----------------
+    @Test
+    public int[] differentIndexWithDifferentTypes() {
+        int[] res1 = new int[SIZE];
+        short[] res2 = new short[SIZE];
+        for (int i = 0; i < SIZE / 2; i++) {
+            res1[i + 1] = ints[i + 1];
+            res2[i + inv2] = shorts[i + inv2];
+        }
+        return res1;
+    }
+
+    @Test
+    // Note that this case cannot be vectorized due to data dependence
+    public int[] differentIndexWithSameType() {
+        int[] res1 = new int[SIZE];
+        int[] res2 = new int[SIZE];
+        for (int i = 0; i < SIZE / 2; i++) {
+            res1[i + 3] = ints[i + 3];
+            res2[i + inv1] = ints[i + inv1];
+        }
+        return res2;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopCombinedOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopCombinedOpTest.java
@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on combined operations
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.LoopCombinedOpTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+import java.util.Random;
+
+public class LoopCombinedOpTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private int[] a;
+    private int[] b;
+    private int[] c;
+    private int[] d;
+    private long[] l1;
+    private long[] l2;
+    private short[] s1;
+    private short[] s2;
+    private int intInv;
+
+    public LoopCombinedOpTest() {
+        a = new int[SIZE];
+        b = new int[SIZE];
+        c = new int[SIZE];
+        d = new int[SIZE];
+        l1 = new long[SIZE];
+        l2 = new long[SIZE];
+        s1 = new short[SIZE];
+        s2 = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = -654321 * i;
+            b[i] =  123456 * i;
+            c[i] = -998877 * i;
+            d[i] =  778899 * i;
+            l1[i] = 5000000000L * i;
+            l2[i] = -600000000L * i;
+            s1[i] = (short) (3 * i);
+            s2[i] = (short) (-2 * i);
+        }
+        Random ran = new Random(999);
+        intInv = ran.nextInt();
+    }
+
+    @Test
+    public int[] opWithConstant() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] + 1234567890;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] opWithLoopInvariant() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = b[i] * intInv;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] opWithConstantAndLoopInvariant() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = c[i] * (intInv & 0xfff);
+        }
+        return res;
+    }
+
+    @Test
+    public int[] multipleOps() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] & b[i] + c[i] & d[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] multipleOpsWithMultipleConstants() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] * 12345678 + 87654321 + b[i] & 0xffff - c[i] * d[i] * 2;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] multipleStores() {
+        int[] res1 = new int[SIZE];
+        int[] res2 = new int[SIZE];
+        int[] res3 = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = a[i] & b[i];
+            res2[i] = c[i] | d[i];
+            res3[i] = res1[i] * res2[i];
+        }
+        return res3;
+    }
+
+    @Test
+    public int[] multipleStoresWithCommonSubExpression() {
+        int[] res1 = new int[SIZE];
+        int[] res2 = new int[SIZE];
+        int[] res3 = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = a[i] * b[i];
+            res2[i] = c[i] * d[i];
+            res3[i] = res1[i] + res2[i];
+        }
+        return res3;
+    }
+
+    @Test
+    public int[] multipleOpsWith2DifferentTypes() {
+        short[] res1 = new short[SIZE];
+        int[] res2 = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = (short) (s1[i] + s2[i]);
+            res2[i] = a[i] + b[i];
+        }
+        return res2;
+    }
+
+    @Test
+    public long[] multipleOpsWith3DifferentTypes() {
+        short[] res1 = new short[SIZE];
+        int[] res2 = new int[SIZE];
+        long[] res3 = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = (short) (s1[i] + s2[i]);
+            res2[i] = a[i] + b[i];
+            res3[i] = l1[i] + l2[i];
+        }
+        return res3;
+    }
+
+    @Test
+    public long[] multipleOpsWith2NonAdjacentTypes() {
+        short[] res1 = new short[SIZE];
+        long[] res2 = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = (short) (s1[i] + s2[i]);
+            res2[i] = l1[i] + l2[i];
+        }
+        return res2;
+    }
+
+    @Test
+    public int[] multipleOpsWith2DifferentTypesAndConstant() {
+        short[] res1 = new short[SIZE];
+        int[] res2 = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = (short) (s1[i] + s2[i]);
+            res2[i] = a[i] + 88888888;;
+        }
+        return res2;
+    }
+
+    @Test
+    public int[] multipleOpsWith2DifferentTypesAndInvariant() {
+        short[] res1 = new short[SIZE];
+        int[] res2 = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = (short) (s1[i] + s2[i]);
+            res2[i] = a[i] * intInv;
+        }
+        return res2;
+    }
+
+    @Test
+    public int[] multipleOpsWith2DifferentTypesAndComplexExpression() {
+        short[] res1 = new short[SIZE];
+        int[] res2 = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = (short) (s1[i] + s2[i]);
+            res2[i] = a[i] * (b[i] + intInv * c[i] & 0xfffffa);
+        }
+        return res2;
+    }
+
+    @Test
+    public int[] fillIndexPlusStride() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = i + 1;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] addArrayWithIndex() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] + i;
+        }
+        return res;
+    }
+
+    @Test
+    public short[] multiplyAddShortIndex() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (short) (i * i + i);
+        }
+        return res;
+    }
+
+    @Test
+    public int[] multiplyBySumOfIndexAndInvariant() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] * (i + 10 + intInv);
+        }
+        return res;
+    }
+
+    @Test
+    public int[] manuallyUnrolledStride2() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE - 1; i += 2) {
+            res[i] = a[i] * b[i];
+            res[i + 1] = a[i + 1] * b[i + 1];
+        }
+        return res;
+    }
+
+    @Test
+    public int partialVectorizableLoop() {
+        int[] res = new int[SIZE];
+        int k = 9;
+        for (int i = 0; i < SIZE / 2; i++) {
+            res[i] = a[i] * b[i];
+            k = 3 * k + 1;
+        }
+        return k;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopControlFlowTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopControlFlowTest.java
@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on simple control flow in loop
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.LoopControlFlowTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+import java.util.Random;
+
+public class LoopControlFlowTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private int[] a;
+    private int[] b;
+    private boolean invCond;
+
+    public LoopControlFlowTest() {
+        a = new int[SIZE];
+        b = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = i + 80000;;
+            b[i] = 80 * i;
+        }
+        Random ran = new Random(505050);
+        invCond = (ran.nextInt() % 2 == 0);
+    }
+
+    @Test
+    public int[] loopInvariantCondition() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            if (invCond) {
+                res[i] = a[i] + b[i];
+            } else {
+                res[i] = a[i] - b[i];
+            }
+        }
+        return res;
+    }
+
+    @Test
+    public int[] arrayElementCondition() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            if (b[i] > 10000) {
+                res[i] = a[i] + b[i];
+            } else {
+                res[i] = a[i] - b[i];
+            }
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopLiveOutNodesTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopLiveOutNodesTest.java
@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on loops with live out nodes
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.LoopLiveOutNodesTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+import java.util.Random;
+
+public class LoopLiveOutNodesTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 3333;
+
+    private int[] a;
+    private int start;
+    private int limit;
+
+    public LoopLiveOutNodesTest() {
+        a = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = -697989 * i;
+        }
+        Random ran = new Random(31415926);
+        start = 999 + ran.nextInt() % 100;
+        limit = start + 1357;
+    }
+
+    @Test
+    public int SimpleIvUsed() {
+        int i = 0;
+        int[] res = new int[SIZE];
+        for (i = start; i < limit; i++) {
+            res[i] = a[i] * 2757;
+        }
+        return i;
+    }
+
+    @Test
+    public int indexedByIvUsed() {
+        int i = 0;
+        int[] res = new int[SIZE];
+        for (i = start; i < limit; i++) {
+            res[i] = a[i] & 0x77ff77ff;
+        }
+        return a[i - 1];
+    }
+
+    @Test
+    public int ivUsedMultiple() {
+        int i = 0;
+        int[] res = new int[SIZE];
+        for (i = start; i < limit; i++) {
+            res[i] = a[i] | 65535;
+        }
+        return i * i;
+    }
+
+    @Test
+    public int ivUsedComplexExpr() {
+        int i = 0;
+        int[] res = new int[SIZE];
+        for (i = start; i < limit; i++) {
+            res[i] = a[i] - 100550;
+        }
+        return a[i] + a[i - 2] + i * i;
+    }
+
+    @Test
+    public int[] ivUsedAnotherLoop() {
+        int i = 0;
+        int[] res = new int[SIZE];
+        for (i = start; i < limit; i++) {
+            res[i] = a[i] * 100;
+        }
+        for (int j = i; j < i + 55; j++) {
+            res[j] = a[j - 500] + 2323;
+        }
+        return res;
+    }
+
+    @Test
+    public int ivUsedInParallel() {
+        int i = 0, j = 0;
+        int[] res = new int[SIZE];
+        for (i = start; i < limit; i++, j++) {
+            res[i] = a[i] + i;
+        }
+        return i * j + a[i] * a[j];
+    }
+
+    @Test
+    public int valueLiveOut() {
+        int val = 0;
+        int[] res = new int[SIZE];
+        for (int i = start; i < limit; i++) {
+            val = a[i] - 101;
+            res[i] = val;
+        }
+        return val;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopRangeStrideTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopRangeStrideTest.java
@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on different loop ranges and strides
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.LoopRangeStrideTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+import java.util.Random;
+
+public class LoopRangeStrideTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private int[] a;
+    private int[] b;
+    private int start;
+    private int end;
+
+    public LoopRangeStrideTest() {
+        a = new int[SIZE];
+        b = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = -i / 2;
+            b[i] = 444 * i - 12345;
+        }
+
+        Random ran = new Random(0);
+        start = Math.abs(ran.nextInt() % 1000);
+        end = start + 1315;
+    }
+
+    // ---------------- Range ----------------
+    @Test
+    public int[] smallConstantRange() {
+        int[] res = new int[SIZE];
+        for (int i = 20; i < 27; i++) {
+            res[i] = a[i] + b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] nonConstantRange() {
+        int[] res = new int[SIZE];
+        for (int i = start; i < end; i++) {
+            res[i] = a[i] - b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] crossZeroRange() {
+        int[] res = new int[SIZE];
+        for (int i = -20; i < 20; i++) {
+            res[i + 50] = a[i + 50] + b[i + 50];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] nonEqualTestRange() {
+        int[] res = new int[SIZE];
+        for (int i = start; i != end; i++) {
+            res[i] = a[i] - b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] shortInductionLoop() {
+        int[] res = new int[SIZE];
+        for (short s = 123; s < 789; s++) {
+            res[s] = a[s] * b[s];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] whileLoop() {
+        int[] res = new int[SIZE];
+        int i = start;
+        while (i < end) {
+            res[i] = a[i] & b[i];
+            i++;
+        }
+        return res;
+    }
+
+    @Test
+    public int[] doWhileLoop() {
+        int[] res = new int[SIZE];
+        int i = start;
+        do {
+            res[i] = a[i] | b[i];
+            i++;
+        } while (i < end);
+        return res;
+    }
+
+    // ---------------- Stride ----------------
+    @Test
+    public int[] stride2Loop() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i += 2) {
+            res[i] = a[i] * b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] stride3Loop() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i += 3) {
+            res[i] = a[i] * b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] stride4Loop() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i += 4) {
+            res[i] = a[i] * b[i];
+        }
+        return res;
+    }
+
+
+    @Test
+    public int[] countDownLoop() {
+        int[] res = new int[SIZE];
+        for (int i = SIZE - 1; i > 0; i--) {
+            res[i] = a[i] * b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] strideMinus2Loop() {
+        int[] res = new int[SIZE];
+        for (int i = SIZE - 1; i > 0; i -= 2) {
+            res[i] = a[i] * b[i];
+        }
+        return res;
+    }
+
+    // ---------- Stride with scale ----------
+    @Test
+    public int[] countupLoopWithNegScale() {
+        int[] res = new int[SIZE];
+        for (int i = SIZE / 2; i < SIZE; i++) {
+            res[SIZE - i] = a[SIZE - i] * b[SIZE - i];
+        }
+        return res;
+    }
+
+    @Test
+    public int[] countDownLoopWithNegScale() {
+        int[] res = new int[SIZE];
+        for (int i = SIZE / 2; i > 0; i--) {
+            res[SIZE - i] = a[SIZE - i] * b[SIZE - i];
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopReductionOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopReductionOpTest.java
@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on reduction operations
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.LoopReductionOpTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+import java.util.Random;
+
+public class LoopReductionOpTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private int[] a;
+    private int[] b;
+    private int[] c;
+    private double[] d;
+    private float[] f;
+    private long[] l;
+    private int intInv;
+
+    public LoopReductionOpTest() {
+        a = new int[SIZE];
+        b = new int[SIZE];
+        c = new int[SIZE];
+        d = new double[SIZE];
+        f = new float[SIZE];
+        l = new long[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = -531 * i;
+            b[i] = 2222 * i + 8;
+            c[i] = 2147480000;
+            d[i] = i * 2.5;
+            f[i] = i * -(333.3f);
+            l[i] = 444444444L * i;
+        }
+        Random ran = new Random(10001);
+        intInv = ran.nextInt();
+    }
+
+    @Test
+    public int reductionAddSumOfArray() {
+        int res = 0;
+        for (int i = 0; i < SIZE; i++) {
+            res += (a[i] + b[i]);
+        }
+        return res;
+    }
+
+    @Test
+    public int reductionAddIndex() {
+        int res = 0;
+        for (int i = 0; i < SIZE; i++) {
+            res += i;
+        }
+        return res;
+    }
+
+    @Test
+    // Note that adding constant in loop would be directly optimized to
+    // scalar operations, hence this case is not vectorized.
+    public int reductionAddConstant() {
+        int res = 0;
+        for (int i = 0; i < SIZE; i++) {
+            res += 222;
+        }
+        return res;
+    }
+
+    @Test
+    public int reductionAddLoopInv() {
+        int res = 0;
+        for (int i = 0; i < SIZE; i++) {
+            res += intInv;
+        }
+        return res;
+    }
+
+    @Test
+    public int reductionAddSumOfMultiple() {
+        int res = 0;
+        for (int i = 0; i < SIZE; i++) {
+            res += (a[i] + b[i]) * i << 2;
+        }
+        return res;
+    }
+
+    @Test
+    public int twoReductions() {
+        int res1 = 0;
+        int res2 = 0;
+        for (int i = 0; i < SIZE; i++) {
+            res1 += a[i];
+            res2 += b[i];
+        }
+        return res1 * res2;
+    }
+
+    @Test
+    public float twoReductionsSameElementSize() {
+        int res1 = 0;
+        float res2 = 0.0f;
+        for (int i = 0; i < SIZE; i++) {
+            res1 += a[i];
+            res2 += f[i];
+        }
+        return res1 * res2;
+    }
+
+    @Test
+    public double twoReductionsDifferentSizes1() {
+        int res1 = 0;
+        double res2 = 0.0;
+        for (int i = 0; i < SIZE; i++) {
+            res1 += a[i];
+            res2 += d[i];
+        }
+        return res1 * res2;
+    }
+
+    @Test
+    public double twoReductionsDifferentSizes2() {
+        long res1 = 0L;
+        float res2 = 0.0f;
+        for (int i = 0; i < SIZE; i++) {
+            res1 += l[i];
+            res2 += f[i];
+        }
+        return res1 * res2;
+    }
+
+    @Test
+    public long reductionWithNonReductionDifferentSizes() {
+        long res = 0L;
+        int[] arr = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            arr[i] = a[i] + b[i];
+            res += l[i];
+        }
+        return res;
+    }
+
+    @Test
+    public long reductionLoopIndexSumDifferentSizes() {
+        int intSum = 0;
+        long longSum = 0L;
+        for (int i = 0; i < SIZE; i++) {
+            intSum += i;
+            longSum += i;
+        }
+        return intSum + 2 * longSum;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/MultipleLoopsTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/MultipleLoopsTest.java
@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test on multiple loops in a method
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   compiler.vectorization.runner.MultipleLoopsTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+public class MultipleLoopsTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 543;
+
+    private int[] a;
+    private int[] b;
+    private int[] c;
+
+    public MultipleLoopsTest() {
+        a = new int[SIZE];
+        b = new int[SIZE];
+        c = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = -654321 * i;
+            b[i] =  123456 * i;
+            c[i] = -998877 * i;
+        }
+    }
+
+    @Test
+    public int[] consecutiveLoops() {
+        int[] res1 = new int[SIZE];
+        int[] res2 = new int[SIZE];
+        int[] res3 = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res1[i] = a[i] + b[i];
+        }
+        for (int i = 0; i < SIZE; i++) {
+            res2[i] = a[i] - b[i];
+        }
+        for (int i = 0; i < SIZE; i++) {
+            res3[i] = res1[i] * res2[i];
+        }
+        return res3;
+    }
+
+    @Test
+    public int[] consecutiveLoopsNested() {
+        int[] res = new int[SIZE];
+        for (int outer = 0; outer < 30; outer++) {
+            for (int i = 0; i < SIZE / 2; i++) {
+                res[i] += a[i];
+            }
+            for (int i = SIZE / 2; i < SIZE; i++) {
+                res[i] *= b[i];
+            }
+        } // Outer loop is counted
+        return res;
+    }
+
+    @Test
+    public int[] nestedLoopOuterNonCounted() {
+        int i = 1;
+        int[] res = new int[SIZE];
+        while (i < SIZE) {
+            int val = i * a[i];
+            for (int j = 0; j < SIZE; j++) {
+                res[j] = b[j] * val;
+            }
+            i *= 2;
+        } // Outer loop is non-counted
+        return res;
+    }
+
+    @Test
+    public int[] nestedLoopIndexCompute() {
+        int[] res = new int[SIZE];
+        for (int i = 50; i < 100; i++) {
+            for (int j = 0; j < 200 - i; j++) {
+                res[i + j] = a[i + j] * b[i + j];
+            }
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/StripMinedLoopTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/StripMinedLoopTest.java
@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Vectorization test with small strip mining iterations
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ *        compiler.vectorization.runner.VectorizationTestRunner
+ *
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   -XX:LoopStripMiningIter=10
+ *                   compiler.vectorization.runner.StripMinedLoopTest
+ *
+ * @requires vm.compiler2.enabled & vm.flagless
+ */
+
+package compiler.vectorization.runner;
+
+import java.util.Random;
+
+public class StripMinedLoopTest extends VectorizationTestRunner {
+
+    private static final int SIZE = 2345;
+
+    private int[] a = new int[SIZE];
+    private int[] b = new int[SIZE];
+
+    public StripMinedLoopTest() {
+        for (int i = 0; i < SIZE; i++) {
+            a[i] = 2;
+            b[i] = 3;
+        }
+    }
+
+    @Test
+    public int[] stripMinedVectorLoop() {
+        int[] res = new int[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = a[i] + b[i];
+        }
+        return res;
+    }
+
+    @Test
+    public int stripMinedReductionLoop() {
+        int res = 0;
+        for (int i = 0; i < SIZE; i++) {
+            res += a[i];
+        }
+        return res;
+    }
+}
+
--- a/test/hotspot/jtreg/compiler/vectorization/runner/VectorizationTestRunner.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/VectorizationTestRunner.java
@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.vectorization.runner;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+import java.lang.reflect.Array;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+
+import java.io.File;
+
+import jdk.test.lib.Utils;
+
+import sun.hotspot.WhiteBox;
+
+public class VectorizationTestRunner {
+
+    private static final WhiteBox WB = WhiteBox.getWhiteBox();
+
+    private static final int COMP_LEVEL_INTP = 0;
+    private static final int COMP_LEVEL_C2 = 4;
+
+    private static final int NMETHOD_COMP_LEVEL_IDX = 1;
+    private static final int NMETHOD_INSTS_IDX = 2;
+
+    private static final long COMP_THRES_SECONDS = 30;
+
+    @Target(ElementType.METHOD)
+    @Retention(RetentionPolicy.RUNTIME)
+    protected @interface Test {}
+
+    protected void run() {
+        // Add extra VM options to enable post loop vectorization
+        WB.setBooleanVMFlag("UnlockExperimentalVMOptions", true);
+        WB.setBooleanVMFlag("PostLoopMultiversioning", true);
+
+        // For each method annotated with @Test in the test method, this test runner
+        // invokes it twice - first time in the interpreter and second time compiled
+        // by C2. Then this runner compares the two return values. Hence we require
+        // each test method returning a primitive value or an array of primitive type.
+        // And each test method should not throw any exceptions.
+        Class klass = getClass();
+        for (Method method : klass.getDeclaredMethods()) {
+            try {
+                if (method.isAnnotationPresent(Test.class)) {
+                    verifyTestMethod(method);
+                    runTestOnMethod(method);
+                }
+            } catch (Exception e) {
+                throw new RuntimeException("Test failed in " + klass.getName() +
+                        "." + method.getName() + ": " + e.getMessage());
+            }
+        }
+    }
+
+    private void verifyTestMethod(Method method) {
+        // Check method parameter count
+        if (method.getParameterCount() > 0) {
+            fail("Test method should have no parameter.");
+        }
+
+        // Check method modifiers
+        int modifiers = method.getModifiers();
+        if (!Modifier.isPublic(modifiers) || Modifier.isStatic(modifiers)) {
+            fail("Test method should be public and non-static.");
+        }
+
+        // Check method return type
+        Class retType = method.getReturnType();
+        if (retType.isPrimitive()) {
+            if (retType == Void.TYPE) {
+                fail("Test method should return non-void.");
+            }
+        } else if (retType.isArray()) {
+            Class elemType = retType.getComponentType();
+            if (!elemType.isPrimitive()) {
+                fail("Only primitive array types are supported.");
+            }
+        } else {
+            fail("Test method should not return Object type.");
+        }
+    }
+
+    private void runTestOnMethod(Method method) throws InterruptedException {
+        Object expected = null;
+        Object actual = null;
+
+        // Lock compilation and inovke the method to get reference result from
+        // the interpreter
+        WB.lockCompilation();
+        try {
+            expected = method.invoke(this);
+        } catch (Exception e) {
+            e.printStackTrace();
+            fail("Exception is thrown in test method invocation (interpreter).");
+        }
+        assert(WB.getMethodCompilationLevel(method) == COMP_LEVEL_INTP);
+        WB.unlockCompilation();
+
+        // Compile the method and invoke it again
+        long enqueueTime = System.currentTimeMillis();
+        WB.enqueueMethodForCompilation(method, COMP_LEVEL_C2);
+        while (WB.getMethodCompilationLevel(method) != COMP_LEVEL_C2) {
+            if (System.currentTimeMillis() - enqueueTime > COMP_THRES_SECONDS * 1000) {
+                fail("Method is not compiled after " + COMP_THRES_SECONDS + "s.");
+            }
+            Thread.sleep(50 /*ms*/);
+        }
+        try {
+            actual = method.invoke(this);
+        } catch (Exception e) {
+            e.printStackTrace();
+            fail("Exception is thrown in test method invocation (C2).");
+        }
+        assert(WB.getMethodCompilationLevel(method) == COMP_LEVEL_C2);
+
+        // Check if two invocations return the same
+        Class retType = method.getReturnType();
+        if (retType.isArray()) {
+            // Method invocations from Java reflection API always return a boxed object.
+            // Hence, for methods return primitive array, we can only use reflection API
+            // to check the consistency of the elements one by one.
+            if (expected == null && actual == null) {
+                return;
+            }
+            if (expected == null ^ actual == null) {
+                fail("Inconsistent return value: null/non-null.");
+            }
+            int length = Array.getLength(expected);
+            if (Array.getLength(actual) != length) {
+                fail("Inconsistent array length: expected = " + length + ", actual = " +
+                        Array.getLength(actual));
+            }
+            for (int idx = 0; idx < length; idx++) {
+                Object e1 = Array.get(expected, idx);
+                Object e2 = Array.get(actual, idx);
+                if (!e1.equals(e2)) {
+                    fail("Inconsistent value at array index [" + idx + "], expected = " +
+                            e1 + ", actual = " + e2);
+                }
+            }
+        } else {
+            // Method invocations from Java reflection API always return a boxed object.
+            // Hence, we should use equals() to check the consistency for methods which
+            // return primitive type.
+            if (!expected.equals(actual)) {
+                fail("Inconsistent return value: expected = " + expected
+                        + ", actual = " + actual);
+            }
+        }
+    }
+
+    private static VectorizationTestRunner createTestInstance(String testName) {
+        if (!testName.toLowerCase().endsWith(".java")) {
+            fail("Invalid test file name " + testName);
+        }
+        testName = testName.substring(0, testName.length() - 5);
+        testName = testName.replace('/', '.');
+
+        VectorizationTestRunner instance = null;
+        try {
+            Class klass = Class.forName(testName);
+            Constructor ctor = klass.getConstructor();
+            instance = (VectorizationTestRunner) ctor.newInstance();
+        } catch (Exception e) {
+            e.printStackTrace();
+            fail("Cannot create test instance for class " + testName);
+        }
+
+        return instance;
+    }
+
+    private static void fail(String reason) {
+        throw new RuntimeException(reason);
+    }
+
+    public static void main(String[] args) {
+        VectorizationTestRunner testObj = createTestInstance(Utils.TEST_NAME);
+        testObj.run();
+    }
+}
+