From aa48705dddee674baa479f5128cfc3b426d87d2d Mon Sep 17 00:00:00 2001
From: Fei Gao <fgao@openjdk.org>
Date: Tue, 27 Sep 2022 01:24:17 +0000
Subject: [PATCH] 8289422: Fix and re-enable vector conditional move

Reviewed-by: thartmann, kvn
---
 src/hotspot/cpu/aarch64/aarch64_vector.ad     |  42 ++++
 src/hotspot/cpu/aarch64/aarch64_vector_ad.m4  |  42 ++++
 src/hotspot/share/opto/matcher.cpp            |  18 +-
 src/hotspot/share/opto/superword.cpp          |  40 ++-
 .../c2/irTests/TestVectorConditionalMove.java | 229 ++++++++++++++++++
 .../compiler/lib/ir_framework/IRNode.java     |   2 +
 .../vm/compiler/TypeVectorOperations.java     |  16 ++
 7 files changed, 374 insertions(+), 15 deletions(-)
 create mode 100644 test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java

diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad
index 463158855e1..2c6b65e10fb 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@@ -5691,6 +5691,48 @@ instruct vblend_sve(vReg dst, vReg src1, vReg src2, pReg pg) %{
   ins_pipe(pipe_slow);
 %}
 
+// ------------------------- Vector conditional move --------------------------
+
+instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{
+  predicate(UseSVE == 0 ||
+            (VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) &&
+             n->in(1)->in(2)->get_int() != BoolTest::ne));
+  match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
+  match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
+  effect(TEMP_DEF dst);
+  format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
+    __ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
+                    $src2$$FloatRegister, (int)($cond$$constant),
+                    /* isQ */ length_in_bytes == 16);
+    __ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
+           $src2$$FloatRegister, $src1$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pRegGov pgtmp) %{
+  predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) ||
+            (UseSVE > 0 && n->in(1)->in(2)->get_int() == BoolTest::ne));
+  match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
+  match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
+  effect(TEMP pgtmp);
+  format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %}
+  ins_encode %{
+    assert(UseSVE > 0, "must be sve");
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    __ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister,
+                   $src2$$FloatRegister, (int)($cond$$constant));
+    __ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+               $pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // ------------------------------ Vector round ---------------------------------
 
 // vector Math.round
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
index 7f02e875373..0a04fb39b72 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@@ -4063,6 +4063,48 @@ instruct vblend_sve(vReg dst, vReg src1, vReg src2, pReg pg) %{
   ins_pipe(pipe_slow);
 %}
 
+// ------------------------- Vector conditional move --------------------------
+
+instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{
+  predicate(UseSVE == 0 ||
+            (VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) &&
+             n->in(1)->in(2)->get_int() != BoolTest::ne));
+  match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
+  match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
+  effect(TEMP_DEF dst);
+  format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
+    __ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
+                    $src2$$FloatRegister, (int)($cond$$constant),
+                    /* isQ */ length_in_bytes == 16);
+    __ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
+           $src2$$FloatRegister, $src1$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pRegGov pgtmp) %{
+  predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) ||
+            (UseSVE > 0 && n->in(1)->in(2)->get_int() == BoolTest::ne));
+  match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
+  match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
+  effect(TEMP pgtmp);
+  format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %}
+  ins_encode %{
+    assert(UseSVE > 0, "must be sve");
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    __ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister,
+                   $src2$$FloatRegister, (int)($cond$$constant));
+    __ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+               $pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // ------------------------------ Vector round ---------------------------------
 
 // vector Math.round
diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp
index c5b85b4d9ce..af7ec7bc0f0 100644
--- a/src/hotspot/share/opto/matcher.cpp
+++ b/src/hotspot/share/opto/matcher.cpp
@@ -2356,9 +2356,7 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
     case Op_CMoveI:
     case Op_CMoveL:
     case Op_CMoveN:
-    case Op_CMoveP:
-    case Op_CMoveVF:
-    case Op_CMoveVD:  {
+    case Op_CMoveP: {
       // Restructure into a binary tree for Matching.  It's possible that
       // we could move this code up next to the graph reshaping for IfNodes
       // or vice-versa, but I do not want to debug this for Ladybird.
@@ -2370,6 +2368,20 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
       n->del_req(3);
       break;
     }
+    case Op_CMoveVF:
+    case Op_CMoveVD: {
+      // Restructure into a binary tree for Matching:
+      // CMoveVF (Binary bool mask) (Binary src1 src2)
+      Node* in_cc = n->in(1);
+      assert(in_cc->is_Con(), "The condition input of cmove vector node must be a constant.");
+      Node* bol = new BoolNode(in_cc, (BoolTest::mask)in_cc->get_int());
+      Node* pair1 = new BinaryNode(bol, in_cc);
+      n->set_req(1, pair1);
+      Node* pair2 = new BinaryNode(n->in(2), n->in(3));
+      n->set_req(2, pair2);
+      n->del_req(3);
+      break;
+    }
     case Op_VectorCmpMasked: {
       Node* pair1 = new BinaryNode(n->in(2), n->in(3));
       n->set_req(2, pair1);
diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
index 19de4394895..0d5bf864667 100644
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@@ -2064,12 +2064,7 @@ bool SuperWord::implemented(Node_List* p) {
         opc = Op_RShiftI;
       }
       retValue = VectorNode::implemented(opc, size, velt_basic_type(p0));
-    }
-    if (!retValue) {
-      if (is_cmov_pack(p)) {
-        NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmpd pack"); print_pack(p);})
-        return true;
-      }
+      NOT_PRODUCT(if(retValue && is_trace_cmov() && is_cmov_pack(p)) {tty->print_cr("SWPointer::implemented: found cmpd pack"); print_pack(p);})
     }
   }
   return retValue;
@@ -2695,12 +2690,33 @@ bool SuperWord::output() {
           ShouldNotReachHere();
         }
 
-        int cond = (int)bol->as_Bool()->_test._test;
-        Node* in_cc  = _igvn.intcon(cond);
-        NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", in_cc->_idx); in_cc->dump();})
-        Node* cc = bol->clone();
-        cc->set_req(1, in_cc);
-        NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created bool cc node %d", cc->_idx); cc->dump();})
+        BoolTest boltest = bol->as_Bool()->_test;
+        BoolTest::mask cond = boltest._test;
+        Node* cmp = bol->in(1);
+        // When the src order of cmp node and cmove node are the same:
+        //   cmp: CmpD src1 src2
+        //   bool: Bool cmp mask
+        //   cmove: CMoveD bool scr1 src2
+        // =====> vectorized, equivalent to
+        //   cmovev: CMoveVD mask src_vector1 src_vector2
+        //
+        // When the src order of cmp node and cmove node are different:
+        //   cmp: CmpD src2 src1
+        //   bool: Bool cmp mask
+        //   cmove: CMoveD bool scr1 src2
+        // =====> equivalent to
+        //   cmp: CmpD src1 src2
+        //   bool: Bool cmp negate(mask)
+        //   cmove: CMoveD bool scr1 src2
+        // (Note: when mask is ne or eq, we don't need to negate it even after swapping.)
+        // =====> vectorized, equivalent to
+        //   cmovev: CMoveVD negate(mask) src_vector1 src_vector2
+        if (cmp->in(2) == n->in(CMoveNode::IfFalse) && cond != BoolTest::ne && cond != BoolTest::eq) {
+          assert(cmp->in(1) == n->in(CMoveNode::IfTrue), "cmpnode and cmovenode don't share the same inputs.");
+          cond = boltest.negate();
+        }
+        Node* cc  = _igvn.intcon((int)cond);
+        NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", cc->_idx); cc->dump();})
 
         Node* src1 = vector_opd(p, 2); //2=CMoveNode::IfFalse
         if (src1 == NULL) {
diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java
new file mode 100644
index 00000000000..4eff35bdfa2
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.c2.irTests;
+
+import compiler.lib.ir_framework.*;
+import java.util.Random;
+import jdk.test.lib.Asserts;
+import jdk.test.lib.Utils;
+
+/*
+ * @test
+ * @bug 8289422
+ * @key randomness
+ * @summary Auto-vectorization enhancement to support vector conditional move on AArch64
+ * @requires os.arch=="aarch64"
+ * @library /test/lib /
+ * @run driver compiler.c2.irTests.TestVectorConditionalMove
+ */
+
+public class TestVectorConditionalMove {
+    final private static int SIZE = 3000;
+    private static final Random RANDOM = Utils.getRandomInstance();
+
+    private static float[] floata = new float[SIZE];
+    private static float[] floatb = new float[SIZE];
+    private static float[] floatc = new float[SIZE];
+    private static double[] doublea = new double[SIZE];
+    private static double[] doubleb = new double[SIZE];
+    private static double[] doublec = new double[SIZE];
+
+    public static void main(String[] args) {
+        TestFramework.runWithFlags("-Xcomp", "-XX:-TieredCompilation", "-XX:+UseCMoveUnconditionally",
+                                   "-XX:+UseVectorCmov", "-XX:CompileCommand=exclude,*.cmove*");
+    }
+
+    private float cmoveFloatGT(float a, float b) {
+        return (a > b) ? a : b;
+    }
+
+    private float cmoveFloatGTSwap(float a, float b) {
+        return (b > a) ? a : b;
+    }
+
+    private float cmoveFloatLT(float a, float b) {
+        return (a < b) ? a : b;
+    }
+
+    private float cmoveFloatLTSwap(float a, float b) {
+        return (b < a) ? a : b;
+    }
+
+    private float cmoveFloatEQ(float a, float b) {
+        return (a == b) ? a : b;
+    }
+
+    private double cmoveDoubleLE(double a, double b) {
+        return (a <= b) ? a : b;
+    }
+
+    private double cmoveDoubleLESwap(double a, double b) {
+        return (b <= a) ? a : b;
+    }
+
+    private double cmoveDoubleGE(double a, double b) {
+        return (a >= b) ? a : b;
+    }
+
+    private double cmoveDoubleGESwap(double a, double b) {
+        return (b >= a) ? a : b;
+    }
+
+    private double cmoveDoubleNE(double a, double b) {
+        return (a != b) ? a : b;
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVF, ">0", IRNode.STORE_VECTOR, ">0"})
+    private static void testCMoveVFGT(float[] a, float[] b, float[] c) {
+        for (int i = 0; i < a.length; i++) {
+            c[i] = (a[i] > b[i]) ? a[i] : b[i];
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVF, ">0", IRNode.STORE_VECTOR, ">0"})
+    private static void testCMoveVFGTSwap(float[] a, float[] b, float[] c) {
+        for (int i = 0; i < a.length; i++) {
+            c[i] = (b[i] > a[i]) ? a[i] : b[i];
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVF, ">0", IRNode.STORE_VECTOR, ">0"})
+    private static void testCMoveVFLT(float[] a, float[] b, float[] c) {
+        for (int i = 0; i < a.length; i++) {
+            c[i] = (a[i] < b[i]) ? a[i] : b[i];
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVF, ">0", IRNode.STORE_VECTOR, ">0"})
+    private static void testCMoveVFLTSwap(float[] a, float[] b, float[] c) {
+        for (int i = 0; i < a.length; i++) {
+            c[i] = (b[i] < a[i]) ? a[i] : b[i];
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVF, ">0", IRNode.STORE_VECTOR, ">0"})
+    private static void testCMoveVFEQ(float[] a, float[] b, float[] c) {
+        for (int i = 0; i < a.length; i++) {
+            c[i] = (a[i] == b[i]) ? a[i] : b[i];
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVD, ">0", IRNode.STORE_VECTOR, ">0"})
+    private static void testCMoveVDLE(double[] a, double[] b, double[] c) {
+        for (int i = 0; i < a.length; i++) {
+            c[i] = (a[i] <= b[i]) ? a[i] : b[i];
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVD, ">0", IRNode.STORE_VECTOR, ">0"})
+    private static void testCMoveVDLESwap(double[] a, double[] b, double[] c) {
+        for (int i = 0; i < a.length; i++) {
+            c[i] = (b[i] <= a[i]) ? a[i] : b[i];
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVD, ">0", IRNode.STORE_VECTOR, ">0"})
+    private static void testCMoveVDGE(double[] a, double[] b, double[] c) {
+        for (int i = 0; i < a.length; i++) {
+            c[i] = (a[i] >= b[i]) ? a[i] : b[i];
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVD, ">0", IRNode.STORE_VECTOR, ">0"})
+    private static void testCMoveVDGESwap(double[] a, double[] b, double[] c) {
+        for (int i = 0; i < a.length; i++) {
+            c[i] = (b[i] >= a[i]) ? a[i] : b[i];
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVD, ">0", IRNode.STORE_VECTOR, ">0"})
+    private static void testCMoveVDNE(double[] a, double[] b, double[] c) {
+        for (int i = 0; i < a.length; i++) {
+            c[i] = (a[i] != b[i]) ? a[i] : b[i];
+        }
+    }
+
+    @Run(test = {"testCMoveVFGT", "testCMoveVFLT","testCMoveVDLE", "testCMoveVDGE", "testCMoveVFEQ", "testCMoveVDNE",
+                 "testCMoveVFGTSwap", "testCMoveVFLTSwap","testCMoveVDLESwap", "testCMoveVDGESwap"})
+    private void testCMove_runner() {
+        for (int i = 0; i < SIZE; i++) {
+            floata[i] = RANDOM.nextFloat();
+            floatb[i] = RANDOM.nextFloat();
+            doublea[i] = RANDOM.nextDouble();
+            doubleb[i] = RANDOM.nextDouble();
+        }
+
+        testCMoveVFGT(floata, floatb, floatc);
+        testCMoveVDLE(doublea, doubleb, doublec);
+        for (int i = 0; i < SIZE; i++) {
+            Asserts.assertEquals(floatc[i], cmoveFloatGT(floata[i], floatb[i]));
+            Asserts.assertEquals(doublec[i], cmoveDoubleLE(doublea[i], doubleb[i]));
+        }
+
+        testCMoveVFLT(floata, floatb, floatc);
+        testCMoveVDGE(doublea, doubleb, doublec);
+        for (int i = 0; i < SIZE; i++) {
+            Asserts.assertEquals(floatc[i], cmoveFloatLT(floata[i], floatb[i]));
+            Asserts.assertEquals(doublec[i], cmoveDoubleGE(doublea[i], doubleb[i]));
+        }
+
+        for (int i = 0; i < SIZE; i++) {
+            if (i % 3 == 0) {
+                floatb[i] = floata[i];
+                doubleb[i] = doublea[i];
+            }
+        }
+
+        testCMoveVFEQ(floata, floatb, floatc);
+        testCMoveVDNE(doublea, doubleb, doublec);
+        for (int i = 0; i < SIZE; i++) {
+            Asserts.assertEquals(floatc[i], cmoveFloatEQ(floata[i], floatb[i]));
+            Asserts.assertEquals(doublec[i], cmoveDoubleNE(doublea[i], doubleb[i]));
+        }
+
+        testCMoveVFGTSwap(floata, floatb, floatc);
+        testCMoveVDLESwap(doublea, doubleb, doublec);
+        for (int i = 0; i < SIZE; i++) {
+            Asserts.assertEquals(floatc[i], cmoveFloatGTSwap(floata[i], floatb[i]));
+            Asserts.assertEquals(doublec[i], cmoveDoubleLESwap(doublea[i], doubleb[i]));
+        }
+
+        testCMoveVFLTSwap(floata, floatb, floatc);
+        testCMoveVDGESwap(doublea, doubleb, doublec);
+        for (int i = 0; i < SIZE; i++) {
+            Asserts.assertEquals(floatc[i], cmoveFloatLTSwap(floata[i], floatb[i]));
+            Asserts.assertEquals(doublec[i], cmoveDoubleGESwap(doublea[i], doubleb[i]));
+        }
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
index a8a408bac3f..c0df49e4330 100644
--- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
+++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
@@ -142,6 +142,8 @@ public class IRNode {
     public static final String SAFEPOINT = START + "SafePoint" + MID + END;
 
     public static final String CMOVEI = START + "CMoveI" + MID + END;
+    public static final String CMOVEVF = START + "CMoveVF" + MID + END;
+    public static final String CMOVEVD = START + "CMoveVD" + MID + END;
     public static final String ABS_I = START + "AbsI" + MID + END;
     public static final String ABS_L = START + "AbsL" + MID + END;
     public static final String ABS_F = START + "AbsF" + MID + END;
diff --git a/test/micro/org/openjdk/bench/vm/compiler/TypeVectorOperations.java b/test/micro/org/openjdk/bench/vm/compiler/TypeVectorOperations.java
index 93ae6039709..9d4f6260440 100644
--- a/test/micro/org/openjdk/bench/vm/compiler/TypeVectorOperations.java
+++ b/test/micro/org/openjdk/bench/vm/compiler/TypeVectorOperations.java
@@ -366,6 +366,22 @@ public abstract class TypeVectorOperations {
         }
     }
 
+    @Benchmark
+    @Fork(jvmArgsPrepend = {"-XX:+UseCMoveUnconditionally", "-XX:+UseVectorCmov"})
+    public void cmoveD() {
+        for (int i = 0; i < COUNT; i++) {
+            resD[i] = resD[i] < doubles[i] ? resD[i] : doubles[i];
+        }
+    }
+
+    @Benchmark
+    @Fork(jvmArgsPrepend = {"-XX:+UseCMoveUnconditionally", "-XX:+UseVectorCmov"})
+    public void cmoveF() {
+        for (int i = 0; i < COUNT; i++) {
+            resF[i] = resF[i] < floats[i] ? resF[i] : floats[i];
+        }
+    }
+
     @Fork(value = 2, jvmArgsPrepend = {
         "-XX:+UseSuperWord"
     })