jdk-24/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java

1155 lines
42 KiB
Java
Raw Normal View History

/*
* Copyright (c) 2022, Arm Limited. All rights reserved.
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.c2.irTests;
import compiler.lib.ir_framework.*;
import java.util.Random;
import jdk.test.lib.Asserts;
import jdk.test.lib.Utils;
/*
* @test
* @bug 8289422 8306088 8313720
* @key randomness
* @summary Auto-vectorization enhancement to support vector conditional move.
* @library /test/lib /
* @run driver compiler.c2.irTests.TestVectorConditionalMove
*/
public class TestVectorConditionalMove {
final private static int SIZE = 1024;
private static final Random RANDOM = Utils.getRandomInstance();
public static void main(String[] args) {
TestFramework.runWithFlags("-XX:+UseCMoveUnconditionally", "-XX:+UseVectorCmov");
}
// Compare 2 values, and pick one of them
private float cmoveFloatGT(float a, float b) {
return (a > b) ? a : b;
}
private float cmoveFloatGTSwap(float a, float b) {
return (b > a) ? a : b;
}
private float cmoveFloatLT(float a, float b) {
return (a < b) ? a : b;
}
private float cmoveFloatLTSwap(float a, float b) {
return (b < a) ? a : b;
}
private float cmoveFloatEQ(float a, float b) {
return (a == b) ? a : b;
}
private double cmoveDoubleLE(double a, double b) {
return (a <= b) ? a : b;
}
private double cmoveDoubleLESwap(double a, double b) {
return (b <= a) ? a : b;
}
private double cmoveDoubleGE(double a, double b) {
return (a >= b) ? a : b;
}
private double cmoveDoubleGESwap(double a, double b) {
return (b >= a) ? a : b;
}
private double cmoveDoubleNE(double a, double b) {
return (a != b) ? a : b;
}
// Extensions: compare 2 values, and pick from 2 consts
private float cmoveFGTforFConst(float a, float b) {
return (a > b) ? 0.1f : -0.1f;
}
private float cmoveFGEforFConst(float a, float b) {
return (a >= b) ? 0.1f : -0.1f;
}
private float cmoveFLTforFConst(float a, float b) {
return (a < b) ? 0.1f : -0.1f;
}
private float cmoveFLEforFConst(float a, float b) {
return (a <= b) ? 0.1f : -0.1f;
}
private float cmoveFEQforFConst(float a, float b) {
return (a == b) ? 0.1f : -0.1f;
}
private float cmoveFNEQforFConst(float a, float b) {
return (a != b) ? 0.1f : -0.1f;
}
private double cmoveDGTforDConst(double a, double b) {
return (a > b) ? 0.1 : -0.1;
}
private double cmoveDGEforDConst(double a, double b) {
return (a >= b) ? 0.1 : -0.1;
}
private double cmoveDLTforDConst(double a, double b) {
return (a < b) ? 0.1 : -0.1;
}
private double cmoveDLEforDConst(double a, double b) {
return (a <= b) ? 0.1 : -0.1;
}
private double cmoveDEQforDConst(double a, double b) {
return (a == b) ? 0.1 : -0.1;
}
private double cmoveDNEQforDConst(double a, double b) {
return (a != b) ? 0.1 : -0.1;
}
// Extension: Compare 2 ILFD values, and pick from 2 ILFD values
private int cmoveIGTforI(int a, int b, int c, int d) {
return (a > b) ? c : d;
}
private long cmoveIGTforL(int a, int b, long c, long d) {
return (a > b) ? c : d;
}
private float cmoveIGTforF(int a, int b, float c, float d) {
return (a > b) ? c : d;
}
private double cmoveIGTforD(int a, int b, double c, double d) {
return (a > b) ? c : d;
}
private int cmoveLGTforI(long a, long b, int c, int d) {
return (a > b) ? c : d;
}
private long cmoveLGTforL(long a, long b, long c, long d) {
return (a > b) ? c : d;
}
private float cmoveLGTforF(long a, long b, float c, float d) {
return (a > b) ? c : d;
}
private double cmoveLGTforD(long a, long b, double c, double d) {
return (a > b) ? c : d;
}
private int cmoveFGTforI(float a, float b, int c, int d) {
return (a > b) ? c : d;
}
private long cmoveFGTforL(float a, float b, long c, long d) {
return (a > b) ? c : d;
}
private float cmoveFGTforF(float a, float b, float c, float d) {
return (a > b) ? c : d;
}
private double cmoveFGTforD(float a, float b, double c, double d) {
return (a > b) ? c : d;
}
private int cmoveDGTforI(double a, double b, int c, int d) {
return (a > b) ? c : d;
}
private long cmoveDGTforL(double a, double b, long c, long d) {
return (a > b) ? c : d;
}
private float cmoveDGTforF(double a, double b, float c, float d) {
return (a > b) ? c : d;
}
private double cmoveDGTforD(double a, double b, double c, double d) {
return (a > b) ? c : d;
}
// Compare 2 values, and pick one of them
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVFGT(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] > b[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVFGTSwap(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (b[i] > a[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVFLT(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] < b[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVFLTSwap(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (b[i] < a[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVFEQ(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] == b[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVDLE(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] <= b[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVDLESwap(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (b[i] <= a[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVDGE(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] >= b[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVDGESwap(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (b[i] >= a[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVDNE(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] != b[i]) ? a[i] : b[i];
}
}
// Extensions: compare 2 values, and pick from 2 consts
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFGTforFConst(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] > b[i]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFGEforFConst(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] >= b[i]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFLTforFConst(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] < b[i]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFLEforFConst(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] <= b[i]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFEQforFConst(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] == b[i]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFNEQforFConst(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] != b[i]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFLTforFConstH2(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i+=2) {
c[i+0] = (a[i+0] < b[i+0]) ? 0.1f : -0.1f;
c[i+1] = (a[i+1] < b[i+1]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFLEforFConstH2(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i+=2) {
c[i+0] = (a[i+0] <= b[i+0]) ? 0.1f : -0.1f;
c[i+1] = (a[i+1] <= b[i+1]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, "=0",
IRNode.VECTOR_MASK_CMP_F, "=0",
IRNode.VECTOR_BLEND_F, "=0",
IRNode.STORE_VECTOR, "=0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFYYforFConstH2(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i+=2) {
c[i+0] = (a[i+0] <= b[i+0]) ? 0.1f : -0.1f;
c[i+1] = (a[i+1] < b[i+1]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, "=0",
IRNode.VECTOR_MASK_CMP_F, "=0",
IRNode.VECTOR_BLEND_F, "=0",
IRNode.STORE_VECTOR, "=0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFXXforFConstH2(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i+=2) {
c[i+0] = (a[i+0] < b[i+0]) ? 0.1f : -0.1f;
c[i+1] = (a[i+1] <= b[i+1]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDGTforDConst(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] > b[i]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDGEforDConst(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] >= b[i]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDLTforDConst(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] < b[i]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDLEforDConst(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] <= b[i]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDEQforDConst(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] == b[i]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDNEQforDConst(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] != b[i]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDLTforDConstH2(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i+=2) {
c[i+0] = (a[i+0] < b[i+0]) ? 0.1 : -0.1;
c[i+1] = (a[i+1] < b[i+1]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDLEforDConstH2(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i+=2) {
c[i+0] = (a[i+0] <= b[i+0]) ? 0.1 : -0.1;
c[i+1] = (a[i+1] <= b[i+1]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, "=0",
IRNode.VECTOR_MASK_CMP_D, "=0",
IRNode.VECTOR_BLEND_D, "=0",
IRNode.STORE_VECTOR, "=0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDYYforDConstH2(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i+=2) {
c[i+0] = (a[i+0] <= b[i+0]) ? 0.1 : -0.1;
c[i+1] = (a[i+1] < b[i+1]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, "=0",
IRNode.VECTOR_MASK_CMP_D, "=0",
IRNode.VECTOR_BLEND_D, "=0",
IRNode.STORE_VECTOR, "=0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDXXforDConstH2(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i+=2) {
c[i+0] = (a[i+0] < b[i+0]) ? 0.1 : -0.1;
c[i+1] = (a[i+1] <= b[i+1]) ? 0.1 : -0.1;
}
}
// Extension: Compare 2 ILFD values, and pick from 2 ILFD values
// Note:
// To guarantee that CMove is introduced, I need to perform the loads before the branch. To ensure they
// do not float down into the branches, I compute a value, and store it to r2 (same as r, except that the
// compilation does not know that).
// So far, vectorization only works for CMoveF/D, with same data-width comparison (F/I for F, D/L for D).
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveIGTforI(int[] a, int[] b, int[] c, int[] d, int[] r, int[] r2) {
for (int i = 0; i < a.length; i++) {
int cc = c[i];
int dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveIGTforL(int[] a, int[] b, long[] c, long[] d, long[] r, long[] r2) {
for (int i = 0; i < a.length; i++) {
long cc = c[i];
long dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_float)", ">0",
IRNode.LOAD_VECTOR_F, IRNode.VECTOR_SIZE + "min(max_int, max_float)", ">0",
IRNode.VECTOR_MASK_CMP_I, IRNode.VECTOR_SIZE + "min(max_int, max_float)", ">0",
IRNode.VECTOR_BLEND_F, IRNode.VECTOR_SIZE + "min(max_int, max_float)", ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveIGTforF(int[] a, int[] b, float[] c, float[] d, float[] r, float[] r2) {
for (int i = 0; i < a.length; i++) {
float cc = c[i];
float dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveIGTforD(int[] a, int[] b, double[] c, double[] d, double[] r, double[] r2) {
for (int i = 0; i < a.length; i++) {
double cc = c[i];
double dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveLGTforI(long[] a, long[] b, int[] c, int[] d, int[] r, int[] r2) {
for (int i = 0; i < a.length; i++) {
int cc = c[i];
int dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveLGTforL(long[] a, long[] b, long[] c, long[] d, long[] r, long[] r2) {
for (int i = 0; i < a.length; i++) {
long cc = c[i];
long dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveLGTforF(long[] a, long[] b, float[] c, float[] d, float[] r, float[] r2) {
for (int i = 0; i < a.length; i++) {
float cc = c[i];
float dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_long, max_double)", ">0",
IRNode.LOAD_VECTOR_D, IRNode.VECTOR_SIZE + "min(max_long, max_double)", ">0",
IRNode.VECTOR_MASK_CMP_L, IRNode.VECTOR_SIZE + "min(max_long, max_double)", ">0",
IRNode.VECTOR_BLEND_D, IRNode.VECTOR_SIZE + "min(max_long, max_double)", ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
// Requires avx2, else L is restricted to 16 byte, and D has 32. That leads to a vector elements mismatch of 2 to 4.
private static void testCMoveLGTforD(long[] a, long[] b, double[] c, double[] d, double[] r, double[] r2) {
for (int i = 0; i < a.length; i++) {
double cc = c[i];
double dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveFGTforI(float[] a, float[] b, int[] c, int[] d, int[] r, int[] r2) {
for (int i = 0; i < a.length; i++) {
int cc = c[i];
int dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveFGTforL(float[] a, float[] b, long[] c, long[] d, long[] r, long[] r2) {
for (int i = 0; i < a.length; i++) {
long cc = c[i];
long dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFGTforF(float[] a, float[] b, float[] c, float[] d, float[] r, float[] r2) {
for (int i = 0; i < a.length; i++) {
float cc = c[i];
float dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveFGTforD(float[] a, float[] b, double[] c, double[] d, double[] r, double[] r2) {
for (int i = 0; i < a.length; i++) {
double cc = c[i];
double dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveDGTforI(double[] a, double[] b, int[] c, int[] d, int[] r, int[] r2) {
for (int i = 0; i < a.length; i++) {
int cc = c[i];
int dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveDGTforL(double[] a, double[] b, long[] c, long[] d, long[] r, long[] r2) {
for (int i = 0; i < a.length; i++) {
long cc = c[i];
long dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveDGTforF(double[] a, double[] b, float[] c, float[] d, float[] r, float[] r2) {
for (int i = 0; i < a.length; i++) {
float cc = c[i];
float dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_D, ">0",
IRNode.VECTOR_MASK_CMP_D, ">0",
IRNode.VECTOR_BLEND_D, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDGTforD(double[] a, double[] b, double[] c, double[] d, double[] r, double[] r2) {
for (int i = 0; i < a.length; i++) {
double cc = c[i];
double dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
// Use some constants in the comparison
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFGTforFCmpCon1(float a, float[] b, float[] c, float[] d, float[] r, float[] r2) {
for (int i = 0; i < b.length; i++) {
float cc = c[i];
float dd = d[i];
r2[i] = cc + dd;
r[i] = (a > b[i]) ? cc : dd;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, ">0",
IRNode.VECTOR_MASK_CMP_F, ">0",
IRNode.VECTOR_BLEND_F, ">0",
IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFGTforFCmpCon2(float[] a, float b, float[] c, float[] d, float[] r, float[] r2) {
for (int i = 0; i < a.length; i++) {
float cc = c[i];
float dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b) ? cc : dd;
}
}
// A case that is currently not supported and is not expected to vectorize
@Test
@IR(failOn = {IRNode.STORE_VECTOR})
private static void testCMoveVDUnsupported() {
double[] doublec = new double[SIZE];
int seed = 1001;
for (int i = 0; i < doublec.length; i++) {
doublec[i] = (i % 2 == 0) ? seed + i : seed - i;
}
}
@Warmup(0)
@Run(test = {"testCMoveVFGT", "testCMoveVFLT","testCMoveVDLE", "testCMoveVDGE", "testCMoveVFEQ", "testCMoveVDNE",
"testCMoveVFGTSwap", "testCMoveVFLTSwap","testCMoveVDLESwap", "testCMoveVDGESwap",
"testCMoveFGTforFConst", "testCMoveFGEforFConst", "testCMoveFLTforFConst",
"testCMoveFLEforFConst", "testCMoveFEQforFConst", "testCMoveFNEQforFConst",
"testCMoveDGTforDConst", "testCMoveDGEforDConst", "testCMoveDLTforDConst",
"testCMoveDLEforDConst", "testCMoveDEQforDConst", "testCMoveDNEQforDConst",
"testCMoveFLTforFConstH2", "testCMoveFLEforFConstH2",
"testCMoveFYYforFConstH2", "testCMoveFXXforFConstH2",
"testCMoveDLTforDConstH2", "testCMoveDLEforDConstH2",
"testCMoveDYYforDConstH2", "testCMoveDXXforDConstH2"})
private void testCMove_runner() {
float[] floata = new float[SIZE];
float[] floatb = new float[SIZE];
float[] floatc = new float[SIZE];
double[] doublea = new double[SIZE];
double[] doubleb = new double[SIZE];
double[] doublec = new double[SIZE];
init(floata);
init(floatb);
init(doublea);
init(doubleb);
testCMoveVFGT(floata, floatb, floatc);
testCMoveVDLE(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFloatGT(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDoubleLE(doublea[i], doubleb[i]));
}
testCMoveVFLT(floata, floatb, floatc);
testCMoveVDGE(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFloatLT(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDoubleGE(doublea[i], doubleb[i]));
}
// Ensure we frequently have equals
for (int i = 0; i < SIZE; i++) {
if (i % 3 == 0) {
floatb[i] = floata[i];
doubleb[i] = doublea[i];
}
}
testCMoveVFEQ(floata, floatb, floatc);
testCMoveVDNE(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFloatEQ(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDoubleNE(doublea[i], doubleb[i]));
}
testCMoveVFGTSwap(floata, floatb, floatc);
testCMoveVDLESwap(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFloatGTSwap(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDoubleLESwap(doublea[i], doubleb[i]));
}
testCMoveVFLTSwap(floata, floatb, floatc);
testCMoveVDGESwap(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFloatLTSwap(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDoubleGESwap(doublea[i], doubleb[i]));
}
// Extensions: compare 2 values, and pick from 2 consts
testCMoveFGTforFConst(floata, floatb, floatc);
testCMoveDGTforDConst(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFGTforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDGTforDConst(doublea[i], doubleb[i]));
}
testCMoveFGEforFConst(floata, floatb, floatc);
testCMoveDGEforDConst(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFGEforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDGEforDConst(doublea[i], doubleb[i]));
}
testCMoveFLTforFConst(floata, floatb, floatc);
testCMoveDLTforDConst(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFLTforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDLTforDConst(doublea[i], doubleb[i]));
}
testCMoveFLEforFConst(floata, floatb, floatc);
testCMoveDLEforDConst(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFLEforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDLEforDConst(doublea[i], doubleb[i]));
}
testCMoveFEQforFConst(floata, floatb, floatc);
testCMoveDEQforDConst(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFEQforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDEQforDConst(doublea[i], doubleb[i]));
}
testCMoveFNEQforFConst(floata, floatb, floatc);
testCMoveDNEQforDConst(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFNEQforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDNEQforDConst(doublea[i], doubleb[i]));
}
// Hand-unrolled (H2) examples:
testCMoveFLTforFConstH2(floata, floatb, floatc);
testCMoveDLTforDConstH2(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFLTforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDLTforDConst(doublea[i], doubleb[i]));
}
testCMoveFLEforFConstH2(floata, floatb, floatc);
testCMoveDLEforDConstH2(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFLEforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDLEforDConst(doublea[i], doubleb[i]));
}
testCMoveFYYforFConstH2(floata, floatb, floatc);
testCMoveDYYforDConstH2(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i+=2) {
Asserts.assertEquals(floatc[i+0], cmoveFLEforFConst(floata[i+0], floatb[i+0]));
Asserts.assertEquals(doublec[i+0], cmoveDLEforDConst(doublea[i+0], doubleb[i+0]));
Asserts.assertEquals(floatc[i+1], cmoveFLTforFConst(floata[i+1], floatb[i+1]));
Asserts.assertEquals(doublec[i+1], cmoveDLTforDConst(doublea[i+1], doubleb[i+1]));
}
testCMoveFXXforFConstH2(floata, floatb, floatc);
testCMoveDXXforDConstH2(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i+=2) {
Asserts.assertEquals(floatc[i+0], cmoveFLTforFConst(floata[i+0], floatb[i+0]));
Asserts.assertEquals(doublec[i+0], cmoveDLTforDConst(doublea[i+0], doubleb[i+0]));
Asserts.assertEquals(floatc[i+1], cmoveFLEforFConst(floata[i+1], floatb[i+1]));
Asserts.assertEquals(doublec[i+1], cmoveDLEforDConst(doublea[i+1], doubleb[i+1]));
}
}
@Warmup(0)
@Run(test = {"testCMoveIGTforI",
"testCMoveIGTforL",
"testCMoveIGTforF",
"testCMoveIGTforD",
"testCMoveLGTforI",
"testCMoveLGTforL",
"testCMoveLGTforF",
"testCMoveLGTforD",
"testCMoveFGTforI",
"testCMoveFGTforL",
"testCMoveFGTforF",
"testCMoveFGTforD",
"testCMoveDGTforI",
"testCMoveDGTforL",
"testCMoveDGTforF",
"testCMoveDGTforD",
"testCMoveFGTforFCmpCon1",
"testCMoveFGTforFCmpCon2"})
private void testCMove_runner_two() {
int[] aI = new int[SIZE];
int[] bI = new int[SIZE];
int[] cI = new int[SIZE];
int[] dI = new int[SIZE];
int[] rI = new int[SIZE];
long[] aL = new long[SIZE];
long[] bL = new long[SIZE];
long[] cL = new long[SIZE];
long[] dL = new long[SIZE];
long[] rL = new long[SIZE];
float[] aF = new float[SIZE];
float[] bF = new float[SIZE];
float[] cF = new float[SIZE];
float[] dF = new float[SIZE];
float[] rF = new float[SIZE];
double[] aD = new double[SIZE];
double[] bD = new double[SIZE];
double[] cD = new double[SIZE];
double[] dD = new double[SIZE];
double[] rD = new double[SIZE];
init(aI);
init(bI);
init(cI);
init(dI);
init(aL);
init(bL);
init(cL);
init(dL);
init(aF);
init(bF);
init(cF);
init(dF);
init(aD);
init(bD);
init(cD);
init(dD);
testCMoveIGTforI(aI, bI, cI, dI, rI, rI);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rI[i], cmoveIGTforI(aI[i], bI[i], cI[i], dI[i]));
}
testCMoveIGTforL(aI, bI, cL, dL, rL, rL);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rL[i], cmoveIGTforL(aI[i], bI[i], cL[i], dL[i]));
}
testCMoveIGTforF(aI, bI, cF, dF, rF, rF);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rF[i], cmoveIGTforF(aI[i], bI[i], cF[i], dF[i]));
}
testCMoveIGTforD(aI, bI, cD, dD, rD, rD);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rD[i], cmoveIGTforD(aI[i], bI[i], cD[i], dD[i]));
}
testCMoveLGTforI(aL, bL, cI, dI, rI, rI);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rI[i], cmoveLGTforI(aL[i], bL[i], cI[i], dI[i]));
}
testCMoveLGTforL(aL, bL, cL, dL, rL, rL);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rL[i], cmoveLGTforL(aL[i], bL[i], cL[i], dL[i]));
}
testCMoveLGTforF(aL, bL, cF, dF, rF, rF);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rF[i], cmoveLGTforF(aL[i], bL[i], cF[i], dF[i]));
}
testCMoveLGTforD(aL, bL, cD, dD, rD, rD);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rD[i], cmoveLGTforD(aL[i], bL[i], cD[i], dD[i]));
}
testCMoveFGTforI(aF, bF, cI, dI, rI, rI);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rI[i], cmoveFGTforI(aF[i], bF[i], cI[i], dI[i]));
}
testCMoveFGTforL(aF, bF, cL, dL, rL, rL);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rL[i], cmoveFGTforL(aF[i], bF[i], cL[i], dL[i]));
}
testCMoveFGTforF(aF, bF, cF, dF, rF, rF);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rF[i], cmoveFGTforF(aF[i], bF[i], cF[i], dF[i]));
}
testCMoveFGTforD(aF, bF, cD, dD, rD, rD);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rD[i], cmoveFGTforD(aF[i], bF[i], cD[i], dD[i]));
}
testCMoveDGTforI(aD, bD, cI, dI, rI, rI);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rI[i], cmoveDGTforI(aD[i], bD[i], cI[i], dI[i]));
}
testCMoveDGTforL(aD, bD, cL, dL, rL, rL);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rL[i], cmoveDGTforL(aD[i], bD[i], cL[i], dL[i]));
}
testCMoveDGTforF(aD, bD, cF, dF, rF, rF);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rF[i], cmoveDGTforF(aD[i], bD[i], cF[i], dF[i]));
}
testCMoveDGTforD(aD, bD, cD, dD, rD, rD);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rD[i], cmoveDGTforD(aD[i], bD[i], cD[i], dD[i]));
}
// Use some constants/invariants in the comparison
testCMoveFGTforFCmpCon1(aF[0], bF, cF, dF, rF, rF);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rF[i], cmoveFGTforF(aF[0], bF[i], cF[i], dF[i]));
}
testCMoveFGTforFCmpCon2(aF, bF[0], cF, dF, rF, rF);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rF[i], cmoveFGTforF(aF[i], bF[0], cF[i], dF[i]));
}
}
private static void init(int[] a) {
for (int i = 0; i < SIZE; i++) {
a[i] = RANDOM.nextInt();
}
}
private static void init(long[] a) {
for (int i = 0; i < SIZE; i++) {
a[i] = RANDOM.nextLong();
}
}
private static void init(float[] a) {
for (int i = 0; i < SIZE; i++) {
a[i] = switch(RANDOM.nextInt() % 20) {
case 0 -> Float.NaN;
case 1 -> 0;
case 2 -> 1;
case 3 -> Float.POSITIVE_INFINITY;
case 4 -> Float.NEGATIVE_INFINITY;
case 5 -> Float.MAX_VALUE;
case 6 -> Float.MIN_VALUE;
case 7, 8, 9 -> RANDOM.nextFloat();
default -> Float.intBitsToFloat(RANDOM.nextInt());
};
}
}
private static void init(double[] a) {
for (int i = 0; i < SIZE; i++) {
a[i] = switch(RANDOM.nextInt() % 20) {
case 0 -> Double.NaN;
case 1 -> 0;
case 2 -> 1;
case 3 -> Double.POSITIVE_INFINITY;
case 4 -> Double.NEGATIVE_INFINITY;
case 5 -> Double.MAX_VALUE;
case 6 -> Double.MIN_VALUE;
case 7, 8, 9 -> RANDOM.nextDouble();
default -> Double.longBitsToDouble(RANDOM.nextLong());
};
}
}
}