From d46f0fb31888db75f5b2b78a162fec16dfc5d0d9 Mon Sep 17 00:00:00 2001
From: Emanuel Peter <epeter@openjdk.org>
Date: Wed, 16 Aug 2023 07:15:43 +0000
Subject: [PATCH] 8313720: C2 SuperWord: wrong result with -XX:+UseVectorCmov
 -XX:+UseCMoveUnconditionally

Reviewed-by: chagedorn, thartmann
---
 src/hotspot/share/opto/superword.cpp          |   9 +
 .../c2/irTests/TestVectorConditionalMove.java | 175 +++++++++++++++++-
 2 files changed, 174 insertions(+), 10 deletions(-)

diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
index 3e78b5580c5..8cd3e46dcf5 100644
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@@ -1270,6 +1270,7 @@ bool SuperWord::isomorphic(Node* s1, Node* s2) {
   if (s1->Opcode() != s2->Opcode()) return false;
   if (s1->req() != s2->req()) return false;
   if (!same_velt_type(s1, s2)) return false;
+  if (s1->is_Bool() && s1->as_Bool()->_test._test != s2->as_Bool()->_test._test) return false;
   Node* s1_ctrl = s1->in(0);
   Node* s2_ctrl = s2->in(0);
   // If the control nodes are equivalent, no further checks are required to test for isomorphism.
@@ -2656,6 +2657,14 @@ bool SuperWord::output() {
         Node_List* p_bol = my_pack(bol);
         assert(p_bol != nullptr, "CMove must have matching Bool pack");
 
+#ifdef ASSERT
+        for (uint j = 0; j < p_bol->size(); j++) {
+          Node* m = p_bol->at(j);
+          assert(m->as_Bool()->_test._test == bol_test,
+                 "all bool nodes must have same test");
+        }
+#endif
+
         CmpNode* cmp = bol->in(1)->as_Cmp();
         assert(cmp != nullptr, "must have cmp above CMove");
         Node_List* p_cmp = my_pack(cmp);
diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java
index f50b091719e..6f6b7f5bd30 100644
--- a/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java
+++ b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java
@@ -31,7 +31,7 @@ import jdk.test.lib.Utils;
 
 /*
  * @test
- * @bug 8289422 8306088
+ * @bug 8289422 8306088 8313720
  * @key randomness
  * @summary Auto-vectorization enhancement to support vector conditional move.
  * @library /test/lib /
@@ -395,6 +395,58 @@ public class TestVectorConditionalMove {
         }
     }
 
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
+                  IRNode.VECTOR_MASK_CMP_F, ">0",
+                  IRNode.VECTOR_BLEND_F, ">0",
+                  IRNode.STORE_VECTOR, ">0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    private static void testCMoveFLTforFConstH2(float[] a, float[] b, float[] c) {
+        for (int i = 0; i < a.length; i+=2) {
+            c[i+0] = (a[i+0] < b[i+0]) ? 0.1f : -0.1f;
+            c[i+1] = (a[i+1] < b[i+1]) ? 0.1f : -0.1f;
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
+                  IRNode.VECTOR_MASK_CMP_F, ">0",
+                  IRNode.VECTOR_BLEND_F, ">0",
+                  IRNode.STORE_VECTOR, ">0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    private static void testCMoveFLEforFConstH2(float[] a, float[] b, float[] c) {
+        for (int i = 0; i < a.length; i+=2) {
+            c[i+0] = (a[i+0] <= b[i+0]) ? 0.1f : -0.1f;
+            c[i+1] = (a[i+1] <= b[i+1]) ? 0.1f : -0.1f;
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR_F, "=0",
+                  IRNode.VECTOR_MASK_CMP_F, "=0",
+                  IRNode.VECTOR_BLEND_F, "=0",
+                  IRNode.STORE_VECTOR, "=0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    private static void testCMoveFYYforFConstH2(float[] a, float[] b, float[] c) {
+        for (int i = 0; i < a.length; i+=2) {
+            c[i+0] = (a[i+0] <= b[i+0]) ? 0.1f : -0.1f;
+            c[i+1] = (a[i+1] <  b[i+1]) ? 0.1f : -0.1f;
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR_F, "=0",
+                  IRNode.VECTOR_MASK_CMP_F, "=0",
+                  IRNode.VECTOR_BLEND_F, "=0",
+                  IRNode.STORE_VECTOR, "=0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    private static void testCMoveFXXforFConstH2(float[] a, float[] b, float[] c) {
+        for (int i = 0; i < a.length; i+=2) {
+            c[i+0] = (a[i+0] <  b[i+0]) ? 0.1f : -0.1f;
+            c[i+1] = (a[i+1] <= b[i+1]) ? 0.1f : -0.1f;
+        }
+    }
+
     @Test
     @IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
                   IRNode.VECTOR_MASK_CMP_D, ">0",
@@ -467,6 +519,58 @@ public class TestVectorConditionalMove {
         }
     }
 
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
+                  IRNode.VECTOR_MASK_CMP_D, ">0",
+                  IRNode.VECTOR_BLEND_D, ">0",
+                  IRNode.STORE_VECTOR, ">0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    private static void testCMoveDLTforDConstH2(double[] a, double[] b, double[] c) {
+        for (int i = 0; i < a.length; i+=2) {
+            c[i+0] = (a[i+0] < b[i+0]) ? 0.1 : -0.1;
+            c[i+1] = (a[i+1] < b[i+1]) ? 0.1 : -0.1;
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
+                  IRNode.VECTOR_MASK_CMP_D, ">0",
+                  IRNode.VECTOR_BLEND_D, ">0",
+                  IRNode.STORE_VECTOR, ">0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    private static void testCMoveDLEforDConstH2(double[] a, double[] b, double[] c) {
+        for (int i = 0; i < a.length; i+=2) {
+            c[i+0] = (a[i+0] <= b[i+0]) ? 0.1 : -0.1;
+            c[i+1] = (a[i+1] <= b[i+1]) ? 0.1 : -0.1;
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR_D, "=0",
+                  IRNode.VECTOR_MASK_CMP_D, "=0",
+                  IRNode.VECTOR_BLEND_D, "=0",
+                  IRNode.STORE_VECTOR, "=0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    private static void testCMoveDYYforDConstH2(double[] a, double[] b, double[] c) {
+        for (int i = 0; i < a.length; i+=2) {
+            c[i+0] = (a[i+0] <= b[i+0]) ? 0.1 : -0.1;
+            c[i+1] = (a[i+1] <  b[i+1]) ? 0.1 : -0.1;
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR_D, "=0",
+                  IRNode.VECTOR_MASK_CMP_D, "=0",
+                  IRNode.VECTOR_BLEND_D, "=0",
+                  IRNode.STORE_VECTOR, "=0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    private static void testCMoveDXXforDConstH2(double[] a, double[] b, double[] c) {
+        for (int i = 0; i < a.length; i+=2) {
+            c[i+0] = (a[i+0] <  b[i+0]) ? 0.1 : -0.1;
+            c[i+1] = (a[i+1] <= b[i+1]) ? 0.1 : -0.1;
+        }
+    }
+
     // Extension: Compare 2 ILFD values, and pick from 2 ILFD values
     // Note:
     //   To guarantee that CMove is introduced, I need to perform the loads before the branch. To ensure they
@@ -716,7 +820,11 @@ public class TestVectorConditionalMove {
                  "testCMoveFGTforFConst", "testCMoveFGEforFConst", "testCMoveFLTforFConst",
                  "testCMoveFLEforFConst", "testCMoveFEQforFConst", "testCMoveFNEQforFConst",
                  "testCMoveDGTforDConst", "testCMoveDGEforDConst", "testCMoveDLTforDConst",
-                 "testCMoveDLEforDConst", "testCMoveDEQforDConst", "testCMoveDNEQforDConst"})
+                 "testCMoveDLEforDConst", "testCMoveDEQforDConst", "testCMoveDNEQforDConst",
+                 "testCMoveFLTforFConstH2", "testCMoveFLEforFConstH2",
+                 "testCMoveFYYforFConstH2", "testCMoveFXXforFConstH2",
+                 "testCMoveDLTforDConstH2", "testCMoveDLEforDConstH2",
+                 "testCMoveDYYforDConstH2", "testCMoveDXXforDConstH2"})
     private void testCMove_runner() {
         float[] floata = new float[SIZE];
         float[] floatb = new float[SIZE];
@@ -815,6 +923,39 @@ public class TestVectorConditionalMove {
             Asserts.assertEquals(floatc[i], cmoveFNEQforFConst(floata[i], floatb[i]));
             Asserts.assertEquals(doublec[i], cmoveDNEQforDConst(doublea[i], doubleb[i]));
         }
+
+        // Hand-unrolled (H2) examples:
+        testCMoveFLTforFConstH2(floata, floatb, floatc);
+        testCMoveDLTforDConstH2(doublea, doubleb, doublec);
+        for (int i = 0; i < SIZE; i++) {
+            Asserts.assertEquals(floatc[i], cmoveFLTforFConst(floata[i], floatb[i]));
+            Asserts.assertEquals(doublec[i], cmoveDLTforDConst(doublea[i], doubleb[i]));
+        }
+
+        testCMoveFLEforFConstH2(floata, floatb, floatc);
+        testCMoveDLEforDConstH2(doublea, doubleb, doublec);
+        for (int i = 0; i < SIZE; i++) {
+            Asserts.assertEquals(floatc[i], cmoveFLEforFConst(floata[i], floatb[i]));
+            Asserts.assertEquals(doublec[i], cmoveDLEforDConst(doublea[i], doubleb[i]));
+        }
+
+        testCMoveFYYforFConstH2(floata, floatb, floatc);
+        testCMoveDYYforDConstH2(doublea, doubleb, doublec);
+        for (int i = 0; i < SIZE; i+=2) {
+            Asserts.assertEquals(floatc[i+0], cmoveFLEforFConst(floata[i+0], floatb[i+0]));
+            Asserts.assertEquals(doublec[i+0], cmoveDLEforDConst(doublea[i+0], doubleb[i+0]));
+            Asserts.assertEquals(floatc[i+1], cmoveFLTforFConst(floata[i+1], floatb[i+1]));
+            Asserts.assertEquals(doublec[i+1], cmoveDLTforDConst(doublea[i+1], doubleb[i+1]));
+        }
+
+        testCMoveFXXforFConstH2(floata, floatb, floatc);
+        testCMoveDXXforDConstH2(doublea, doubleb, doublec);
+        for (int i = 0; i < SIZE; i+=2) {
+            Asserts.assertEquals(floatc[i+0], cmoveFLTforFConst(floata[i+0], floatb[i+0]));
+            Asserts.assertEquals(doublec[i+0], cmoveDLTforDConst(doublea[i+0], doubleb[i+0]));
+            Asserts.assertEquals(floatc[i+1], cmoveFLEforFConst(floata[i+1], floatb[i+1]));
+            Asserts.assertEquals(doublec[i+1], cmoveDLEforDConst(doublea[i+1], doubleb[i+1]));
+        }
     }
 
     @Warmup(0)
@@ -981,19 +1122,33 @@ public class TestVectorConditionalMove {
 
     private static void init(float[] a) {
         for (int i = 0; i < SIZE; i++) {
-            a[i] = RANDOM.nextFloat();
-            if (RANDOM.nextInt() % 20 == 0) {
-                a[i] = Float.NaN;
-            }
+            a[i] = switch(RANDOM.nextInt() % 20) {
+                case 0  -> Float.NaN;
+                case 1  -> 0;
+                case 2  -> 1;
+                case 3  -> Float.POSITIVE_INFINITY;
+                case 4  -> Float.NEGATIVE_INFINITY;
+                case 5  -> Float.MAX_VALUE;
+                case 6  -> Float.MIN_VALUE;
+                case 7, 8, 9 -> RANDOM.nextFloat();
+                default -> Float.intBitsToFloat(RANDOM.nextInt());
+            };
         }
     }
 
     private static void init(double[] a) {
         for (int i = 0; i < SIZE; i++) {
-            a[i] = RANDOM.nextDouble();
-            if (RANDOM.nextInt() % 20 == 0) {
-                a[i] = Double.NaN;
-            }
+            a[i] = switch(RANDOM.nextInt() % 20) {
+                case 0  -> Double.NaN;
+                case 1  -> 0;
+                case 2  -> 1;
+                case 3  -> Double.POSITIVE_INFINITY;
+                case 4  -> Double.NEGATIVE_INFINITY;
+                case 5  -> Double.MAX_VALUE;
+                case 6  -> Double.MIN_VALUE;
+                case 7, 8, 9 -> RANDOM.nextDouble();
+                default -> Double.longBitsToDouble(RANDOM.nextLong());
+            };
         }
     }
 }