From 96c396b701e290fc3e1124b1c862b41e02e9c1d9 Mon Sep 17 00:00:00 2001
From: Ningsheng Jian <njian@openjdk.org>
Date: Fri, 5 Nov 2021 07:45:54 +0000
Subject: [PATCH] 8276151: AArch64: Incorrect result for double to int vector
 conversion

Reviewed-by: aph, psandoz
---
 src/hotspot/cpu/aarch64/aarch64_neon.ad       | 17 +++--
 src/hotspot/cpu/aarch64/aarch64_neon_ad.m4    | 46 +++++++++----
 .../vectorapi/VectorCastShape128Test.java     | 69 +++++++++++++++++--
 .../vectorapi/VectorCastShape64Test.java      | 68 ++++++++++++++++--
 4 files changed, 170 insertions(+), 30 deletions(-)

diff --git a/src/hotspot/cpu/aarch64/aarch64_neon.ad b/src/hotspot/cpu/aarch64/aarch64_neon.ad
index 0b59686238a..b3cb6f5f255 100644
--- a/src/hotspot/cpu/aarch64/aarch64_neon.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_neon.ad
@@ -506,12 +506,21 @@ instruct vcvt2Dto2I(vecD dst, vecX src)
 %{
   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
   match(Set dst (VectorCastD2X src));
-  format %{ "fcvtzs  $dst, T2D, $src\n\t"
-            "xtn     $dst, T2S, $dst, T2D\t# convert 2D to 2I vector"
+  effect(TEMP_DEF dst);
+  format %{ "ins      $dst, D, $src, 0, 1\n\t"
+            "fcvtzdw  rscratch1, $src\n\t"
+            "fcvtzdw  rscratch2, $dst\n\t"
+            "fmovs    $dst, rscratch1\n\t"
+            "mov      $dst, T2S, 1, rscratch2\t#convert 2D to 2I vector"
   %}
   ins_encode %{
-    __ fcvtzs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
-    __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($dst$$reg), __ T2D);
+    __ ins(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), 0, 1);
+    // We can't use fcvtzs(vector, integer) instruction here because we need
+    // saturation arithmetic. See JDK-8276151.
+    __ fcvtzdw(rscratch1, as_FloatRegister($src$$reg));
+    __ fcvtzdw(rscratch2, as_FloatRegister($dst$$reg));
+    __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
+    __ mov(as_FloatRegister($dst$$reg), __ T2S, 1, rscratch2);
   %}
   ins_pipe(pipe_slow);
 %}
diff --git a/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4
index 4d947b8141c..ded6d0fd6d2 100644
--- a/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4
@@ -272,25 +272,43 @@ dnl             $1 $2 $3 $4 $5
 VECTOR_CAST_F2I(2, F, I, D, 2S)
 VECTOR_CAST_F2I(4, F, I, X, 4S)
 VECTOR_CAST_F2I(2, D, L, X, 2D)
-dnl
-define(`VECTOR_CAST_F2I_L', `
-instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$5 src)
+
+instruct vcvt4Fto4S(vecD dst, vecX src)
 %{
-  predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
-  match(Set dst (VectorCast$2`'2X src));
-  format %{ "fcvtzs  $dst, T$6, $src\n\t"
-            "xtn     $dst, T$7, $dst, T$6\t# convert $1$2 to $1$3 vector"
+  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+  match(Set dst (VectorCastF2X src));
+  format %{ "fcvtzs  $dst, T4S, $src\n\t"
+            "xtn     $dst, T4H, $dst, T4S\t# convert 4F to 4S vector"
   %}
   ins_encode %{
-    __ fcvtzs(as_FloatRegister($dst$$reg), __ T$6, as_FloatRegister($src$$reg));
-    __ xtn(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($dst$$reg), __ T$6);
+    __ fcvtzs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
+    __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
   %}
   ins_pipe(pipe_slow);
-%}')dnl
-dnl               $1 $2 $3 $4 $5 $6  $7
-VECTOR_CAST_F2I_L(4, F, S, D, X, 4S, 4H)
-VECTOR_CAST_F2I_L(2, D, I, D, X, 2D, 2S)
-dnl
+%}
+
+instruct vcvt2Dto2I(vecD dst, vecX src)
+%{
+  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+  match(Set dst (VectorCastD2X src));
+  effect(TEMP_DEF dst);
+  format %{ "ins      $dst, D, $src, 0, 1\n\t"
+            "fcvtzdw  rscratch1, $src\n\t"
+            "fcvtzdw  rscratch2, $dst\n\t"
+            "fmovs    $dst, rscratch1\n\t"
+            "mov      $dst, T2S, 1, rscratch2\t#convert 2D to 2I vector"
+  %}
+  ins_encode %{
+    __ ins(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), 0, 1);
+    // We can't use fcvtzs(vector, integer) instruction here because we need
+    // saturation arithmetic. See JDK-8276151.
+    __ fcvtzdw(rscratch1, as_FloatRegister($src$$reg));
+    __ fcvtzdw(rscratch2, as_FloatRegister($dst$$reg));
+    __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
+    __ mov(as_FloatRegister($dst$$reg), __ T2S, 1, rscratch2);
+  %}
+  ins_pipe(pipe_slow);
+%}
 
 instruct vcvt4Fto4B(vecD dst, vecX src)
 %{
diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorCastShape128Test.java b/test/hotspot/jtreg/compiler/vectorapi/VectorCastShape128Test.java
index dc4eedea3a1..5feef36e531 100644
--- a/test/hotspot/jtreg/compiler/vectorapi/VectorCastShape128Test.java
+++ b/test/hotspot/jtreg/compiler/vectorapi/VectorCastShape128Test.java
@@ -24,6 +24,7 @@
 package compiler.vectorapi;
 
 import java.util.Random;
+import jdk.test.lib.Utils;
 
 import jdk.incubator.vector.ByteVector;
 import jdk.incubator.vector.DoubleVector;
@@ -39,9 +40,11 @@ import org.testng.annotations.Test;
 /**
  * @test
  * @bug 8268966
+ * @key randomness
  * @summary AArch64: 'bad AD file' in some vector conversion tests
+ * @library /test/lib
  * @modules jdk.incubator.vector
- * @run testng/othervm -XX:-TieredCompilation compiler.vectorapi.VectorCastShape128Test
+ * @run testng/othervm -XX:-TieredCompilation -XX:CompileThreshold=100 compiler.vectorapi.VectorCastShape128Test
  */
 
 
@@ -54,8 +57,8 @@ public class VectorCastShape128Test {
     private static final VectorSpecies<Float> fspec = FloatVector.SPECIES_128;
     private static final VectorSpecies<Double> dspec = DoubleVector.SPECIES_128;
 
-    private static final int NUM_ITER = 50000;
-    private static final int LENGTH = 512;
+    private static final int NUM_ITER = 10000;
+    private static final int LENGTH = 1024;
     private static int[] ia;
     private static int[] ib;
     private static byte[] ba;
@@ -69,6 +72,51 @@ public class VectorCastShape128Test {
     private static float[] fa;
     private static float[] fb;
 
+    public static float [] fspecial = {
+        0.0f,
+        -0.0f,
+        Float.MAX_VALUE,
+        Float.MIN_VALUE,
+        -Float.MAX_VALUE,
+        -Float.MIN_VALUE,
+        Float.NaN,
+        Float.POSITIVE_INFINITY,
+        Float.NEGATIVE_INFINITY,
+        Integer.MAX_VALUE,
+        Integer.MIN_VALUE,
+        Long.MAX_VALUE,
+        Long.MIN_VALUE,
+    };
+
+    public static double [] dspecial = {
+        0.0,
+        -0.0,
+        Double.MAX_VALUE,
+        Double.MIN_VALUE,
+        -Double.MAX_VALUE,
+        -Double.MIN_VALUE,
+        Double.NaN,
+        Double.POSITIVE_INFINITY,
+        Double.NEGATIVE_INFINITY,
+        Integer.MAX_VALUE,
+        Integer.MIN_VALUE,
+        Long.MIN_VALUE,
+        Long.MAX_VALUE,
+    };
+
+    public static int [] ispecial = {
+        0,
+        Integer.MAX_VALUE,
+        Integer.MIN_VALUE,
+    };
+
+    public static long [] lspecial = {
+        0,
+        Long.MAX_VALUE,
+        Long.MIN_VALUE,
+    };
+
+
     private static void initialize() {
         ia = new int[LENGTH];
         ib = new int[LENGTH];
@@ -82,14 +130,23 @@ public class VectorCastShape128Test {
         fb = new float[LENGTH];
         da = new double[LENGTH];
         db = new double[LENGTH];
-        Random r = new Random();
+        Random r = Utils.getRandomInstance();
         for (int i = 0; i < LENGTH; i++) {
             ia[i] = r.nextInt();
             la[i] = r.nextLong();
             sa[i] = (short) r.nextInt();
             ba[i] = (byte) r.nextInt();
-            fa[i] = r.nextFloat();
-            da[i] = r.nextDouble();
+            fa[i] = ia[i] + r.nextFloat();
+            da[i] = la[i] + r.nextDouble();
+        }
+
+        // Replicate to make sure the values get tested, as some elements may be
+        // ignored for some vector conversions.
+        for (int i = 0; i < 4; i++) {
+            System.arraycopy(ispecial, 0, ia, ispecial.length * i, ispecial.length);
+            System.arraycopy(lspecial, 0, la, lspecial.length * i, lspecial.length);
+            System.arraycopy(fspecial, 0, fa, fspecial.length * i, fspecial.length);
+            System.arraycopy(dspecial, 0, da, dspecial.length * i, dspecial.length);
         }
     }
 
diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorCastShape64Test.java b/test/hotspot/jtreg/compiler/vectorapi/VectorCastShape64Test.java
index 835573010ba..98f2d64f350 100644
--- a/test/hotspot/jtreg/compiler/vectorapi/VectorCastShape64Test.java
+++ b/test/hotspot/jtreg/compiler/vectorapi/VectorCastShape64Test.java
@@ -24,6 +24,7 @@
 package compiler.vectorapi;
 
 import java.util.Random;
+import jdk.test.lib.Utils;
 
 import jdk.incubator.vector.ByteVector;
 import jdk.incubator.vector.DoubleVector;
@@ -39,9 +40,11 @@ import org.testng.annotations.Test;
 /**
  * @test
  * @bug 8268966
+ * @key randomness
  * @summary AArch64: 'bad AD file' in some vector conversion tests
+ * @library /test/lib
  * @modules jdk.incubator.vector
- * @run testng/othervm -XX:-TieredCompilation compiler.vectorapi.VectorCastShape64Test
+ * @run testng/othervm -XX:-TieredCompilation -XX:CompileThreshold=100 compiler.vectorapi.VectorCastShape64Test
  */
 
 
@@ -54,8 +57,8 @@ public class VectorCastShape64Test {
     private static final VectorSpecies<Float> fspec = FloatVector.SPECIES_64;
     private static final VectorSpecies<Double> dspec = DoubleVector.SPECIES_64;
 
-    private static final int NUM_ITER = 50000;
-    private static final int LENGTH = 512;
+    private static final int NUM_ITER = 10000;
+    private static final int LENGTH = 1024;
     private static int[] ia;
     private static int[] ib;
     private static byte[] ba;
@@ -69,6 +72,50 @@ public class VectorCastShape64Test {
     private static float[] fa;
     private static float[] fb;
 
+    public static float [] fspecial = {
+        0.0f,
+        -0.0f,
+        Float.MAX_VALUE,
+        Float.MIN_VALUE,
+        -Float.MAX_VALUE,
+        -Float.MIN_VALUE,
+        Float.NaN,
+        Float.POSITIVE_INFINITY,
+        Float.NEGATIVE_INFINITY,
+        Integer.MAX_VALUE,
+        Integer.MIN_VALUE,
+        Long.MAX_VALUE,
+        Long.MIN_VALUE,
+    };
+
+    public static double [] dspecial = {
+        0.0,
+        -0.0,
+        Double.MAX_VALUE,
+        Double.MIN_VALUE,
+        -Double.MAX_VALUE,
+        -Double.MIN_VALUE,
+        Double.NaN,
+        Double.POSITIVE_INFINITY,
+        Double.NEGATIVE_INFINITY,
+        Integer.MAX_VALUE,
+        Integer.MIN_VALUE,
+        Long.MIN_VALUE,
+        Long.MAX_VALUE,
+    };
+
+    public static int [] ispecial = {
+        0,
+        Integer.MAX_VALUE,
+        Integer.MIN_VALUE,
+    };
+
+    public static long [] lspecial = {
+        0,
+        Long.MAX_VALUE,
+        Long.MIN_VALUE,
+    };
+
     private static void initialize() {
         ia = new int[LENGTH];
         ib = new int[LENGTH];
@@ -82,14 +129,23 @@ public class VectorCastShape64Test {
         fb = new float[LENGTH];
         da = new double[LENGTH];
         db = new double[LENGTH];
-        Random r = new Random();
+        Random r = Utils.getRandomInstance();
         for (int i = 0; i < LENGTH; i++) {
             ia[i] = r.nextInt();
             la[i] = r.nextLong();
             sa[i] = (short) r.nextInt();
             ba[i] = (byte) r.nextInt();
-            fa[i] = r.nextFloat();
-            da[i] = r.nextDouble();
+            fa[i] = ia[i] + r.nextFloat();
+            da[i] = la[i] + r.nextDouble();
+        }
+
+        // Replicate to make sure the values get tested, as some elements may be
+        // ignored for some vector conversions.
+        for (int i = 0; i < 4; i++) {
+            System.arraycopy(ispecial, 0, ia, ispecial.length * i, ispecial.length);
+            System.arraycopy(lspecial, 0, la, lspecial.length * i, lspecial.length);
+            System.arraycopy(fspecial, 0, fa, fspecial.length * i, fspecial.length);
+            System.arraycopy(dspecial, 0, da, dspecial.length * i, dspecial.length);
         }
     }