/*
 * Copyright (c) 2022, Arm Limited. All rights reserved.
 * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package compiler.c2.irTests;

import compiler.lib.ir_framework.*;
import java.util.Random;
import jdk.test.lib.Asserts;
import jdk.test.lib.Utils;

/*
 * @test
 * @bug 8289422 8306088
 * @key randomness
 * @summary Auto-vectorization enhancement to support vector conditional move.
 * @library /test/lib /
 * @run driver compiler.c2.irTests.TestVectorConditionalMove
 */

public class TestVectorConditionalMove {
    final private static int SIZE = 1024;
    private static final Random RANDOM = Utils.getRandomInstance();

    public static void main(String[] args) {
        TestFramework.runWithFlags("-XX:+UseCMoveUnconditionally", "-XX:+UseVectorCmov");
    }

    // Compare 2 values, and pick one of them
    private float cmoveFloatGT(float a, float b) {
        return (a > b) ? a : b;
    }

    private float cmoveFloatGTSwap(float a, float b) {
        return (b > a) ? a : b;
    }

    private float cmoveFloatLT(float a, float b) {
        return (a < b) ? a : b;
    }

    private float cmoveFloatLTSwap(float a, float b) {
        return (b < a) ? a : b;
    }

    private float cmoveFloatEQ(float a, float b) {
        return (a == b) ? a : b;
    }

    private double cmoveDoubleLE(double a, double b) {
        return (a <= b) ? a : b;
    }

    private double cmoveDoubleLESwap(double a, double b) {
        return (b <= a) ? a : b;
    }

    private double cmoveDoubleGE(double a, double b) {
        return (a >= b) ? a : b;
    }

    private double cmoveDoubleGESwap(double a, double b) {
        return (b >= a) ? a : b;
    }

    private double cmoveDoubleNE(double a, double b) {
        return (a != b) ? a : b;
    }

    // Extensions: compare 2 values, and pick from 2 consts
    private float cmoveFGTforFConst(float a, float b) {
        return (a > b) ? 0.1f : -0.1f;
    }

    private float cmoveFGEforFConst(float a, float b) {
        return (a >= b) ? 0.1f : -0.1f;
    }

    private float cmoveFLTforFConst(float a, float b) {
        return (a < b) ? 0.1f : -0.1f;
    }

    private float cmoveFLEforFConst(float a, float b) {
        return (a <= b) ? 0.1f : -0.1f;
    }

    private float cmoveFEQforFConst(float a, float b) {
        return (a == b) ? 0.1f : -0.1f;
    }

    private float cmoveFNEQforFConst(float a, float b) {
        return (a != b) ? 0.1f : -0.1f;
    }

    private double cmoveDGTforDConst(double a, double b) {
        return (a > b) ? 0.1 : -0.1;
    }

    private double cmoveDGEforDConst(double a, double b) {
        return (a >= b) ? 0.1 : -0.1;
    }

    private double cmoveDLTforDConst(double a, double b) {
        return (a < b) ? 0.1 : -0.1;
    }

    private double cmoveDLEforDConst(double a, double b) {
        return (a <= b) ? 0.1 : -0.1;
    }

    private double cmoveDEQforDConst(double a, double b) {
        return (a == b) ? 0.1 : -0.1;
    }

    private double cmoveDNEQforDConst(double a, double b) {
        return (a != b) ? 0.1 : -0.1;
    }

    // Extension: Compare 2 ILFD values, and pick from 2 ILFD values
    private int cmoveIGTforI(int a, int b, int c, int d) {
        return (a > b) ? c : d;
    }

    private long cmoveIGTforL(int a, int b, long c, long d) {
        return (a > b) ? c : d;
    }

    private float cmoveIGTforF(int a, int b, float c, float d) {
        return (a > b) ? c : d;
    }

    private double cmoveIGTforD(int a, int b, double c, double d) {
        return (a > b) ? c : d;
    }

    private int cmoveLGTforI(long a, long b, int c, int d) {
        return (a > b) ? c : d;
    }

    private long cmoveLGTforL(long a, long b, long c, long d) {
        return (a > b) ? c : d;
    }

    private float cmoveLGTforF(long a, long b, float c, float d) {
        return (a > b) ? c : d;
    }

    private double cmoveLGTforD(long a, long b, double c, double d) {
        return (a > b) ? c : d;
    }

    private int cmoveFGTforI(float a, float b, int c, int d) {
        return (a > b) ? c : d;
    }

    private long cmoveFGTforL(float a, float b, long c, long d) {
        return (a > b) ? c : d;
    }

    private float cmoveFGTforF(float a, float b, float c, float d) {
        return (a > b) ? c : d;
    }

    private double cmoveFGTforD(float a, float b, double c, double d) {
        return (a > b) ? c : d;
    }

    private int cmoveDGTforI(double a, double b, int c, int d) {
        return (a > b) ? c : d;
    }

    private long cmoveDGTforL(double a, double b, long c, long d) {
        return (a > b) ? c : d;
    }

    private float cmoveDGTforF(double a, double b, float c, float d) {
        return (a > b) ? c : d;
    }

    private double cmoveDGTforD(double a, double b, double c, double d) {
        return (a > b) ? c : d;
    }

    // Compare 2 values, and pick one of them
    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveVFGT(float[] a, float[] b, float[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] > b[i]) ? a[i] : b[i];
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveVFGTSwap(float[] a, float[] b, float[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (b[i] > a[i]) ? a[i] : b[i];
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveVFLT(float[] a, float[] b, float[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] < b[i]) ? a[i] : b[i];
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveVFLTSwap(float[] a, float[] b, float[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (b[i] < a[i]) ? a[i] : b[i];
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveVFEQ(float[] a, float[] b, float[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] == b[i]) ? a[i] : b[i];
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveVDLE(double[] a, double[] b, double[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] <= b[i]) ? a[i] : b[i];
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveVDLESwap(double[] a, double[] b, double[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (b[i] <= a[i]) ? a[i] : b[i];
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveVDGE(double[] a, double[] b, double[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] >= b[i]) ? a[i] : b[i];
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveVDGESwap(double[] a, double[] b, double[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (b[i] >= a[i]) ? a[i] : b[i];
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveVDNE(double[] a, double[] b, double[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] != b[i]) ? a[i] : b[i];
        }
    }

    // Extensions: compare 2 values, and pick from 2 consts
    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveFGTforFConst(float[] a, float[] b, float[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] > b[i]) ? 0.1f : -0.1f;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveFGEforFConst(float[] a, float[] b, float[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] >= b[i]) ? 0.1f : -0.1f;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveFLTforFConst(float[] a, float[] b, float[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] < b[i]) ? 0.1f : -0.1f;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveFLEforFConst(float[] a, float[] b, float[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] <= b[i]) ? 0.1f : -0.1f;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveFEQforFConst(float[] a, float[] b, float[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] == b[i]) ? 0.1f : -0.1f;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveFNEQforFConst(float[] a, float[] b, float[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] != b[i]) ? 0.1f : -0.1f;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveDGTforDConst(double[] a, double[] b, double[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] > b[i]) ? 0.1 : -0.1;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveDGEforDConst(double[] a, double[] b, double[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] >= b[i]) ? 0.1 : -0.1;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveDLTforDConst(double[] a, double[] b, double[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] < b[i]) ? 0.1 : -0.1;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveDLEforDConst(double[] a, double[] b, double[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] <= b[i]) ? 0.1 : -0.1;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveDEQforDConst(double[] a, double[] b, double[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] == b[i]) ? 0.1 : -0.1;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveDNEQforDConst(double[] a, double[] b, double[] c) {
        for (int i = 0; i < a.length; i++) {
            c[i] = (a[i] != b[i]) ? 0.1 : -0.1;
        }
    }

    // Extension: Compare 2 ILFD values, and pick from 2 ILFD values
    // Note:
    //   To guarantee that CMove is introduced, I need to perform the loads before the branch. To ensure they
    //   do not float down into the branches, I compute a value, and store it to r2 (same as r, except that the
    //   compilation does not know that).
    //   So far, vectorization only works for CMoveF/D, with same data-width comparison (F/I for F, D/L for D).
    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveIGTforI(int[] a, int[] b, int[] c, int[] d, int[] r, int[] r2) {
        for (int i = 0; i < a.length; i++) {
            int cc = c[i];
            int dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveIGTforL(int[] a, int[] b, long[] c, long[] d, long[] r, long[] r2) {
        for (int i = 0; i < a.length; i++) {
            long cc = c[i];
            long dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveIGTforF(int[] a, int[] b, float[] c, float[] d, float[] r, float[] r2) {
        for (int i = 0; i < a.length; i++) {
            float cc = c[i];
            float dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveIGTforD(int[] a, int[] b, double[] c, double[] d, double[] r, double[] r2) {
        for (int i = 0; i < a.length; i++) {
            double cc = c[i];
            double dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveLGTforI(long[] a, long[] b, int[] c, int[] d, int[] r, int[] r2) {
        for (int i = 0; i < a.length; i++) {
            int cc = c[i];
            int dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveLGTforL(long[] a, long[] b, long[] c, long[] d, long[] r, long[] r2) {
        for (int i = 0; i < a.length; i++) {
            long cc = c[i];
            long dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveLGTforF(long[] a, long[] b, float[] c, float[] d, float[] r, float[] r2) {
        for (int i = 0; i < a.length; i++) {
            float cc = c[i];
            float dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
    // Requires avx2, else L is restricted to 16 byte, and D has 32. That leads to a vector elements mismatch of 2 to 4.
    private static void testCMoveLGTforD(long[] a, long[] b, double[] c, double[] d, double[] r, double[] r2) {
        for (int i = 0; i < a.length; i++) {
            double cc = c[i];
            double dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveFGTforI(float[] a, float[] b, int[] c, int[] d, int[] r, int[] r2) {
        for (int i = 0; i < a.length; i++) {
            int cc = c[i];
            int dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveFGTforL(float[] a, float[] b, long[] c, long[] d, long[] r, long[] r2) {
        for (int i = 0; i < a.length; i++) {
            long cc = c[i];
            long dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveFGTforF(float[] a, float[] b, float[] c, float[] d, float[] r, float[] r2) {
        for (int i = 0; i < a.length; i++) {
            float cc = c[i];
            float dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveFGTforD(float[] a, float[] b, double[] c, double[] d, double[] r, double[] r2) {
        for (int i = 0; i < a.length; i++) {
            double cc = c[i];
            double dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveDGTforI(double[] a, double[] b, int[] c, int[] d, int[] r, int[] r2) {
        for (int i = 0; i < a.length; i++) {
            int cc = c[i];
            int dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveDGTforL(double[] a, double[] b, long[] c, long[] d, long[] r, long[] r2) {
        for (int i = 0; i < a.length; i++) {
            long cc = c[i];
            long dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveDGTforF(double[] a, double[] b, float[] c, float[] d, float[] r, float[] r2) {
        for (int i = 0; i < a.length; i++) {
            float cc = c[i];
            float dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveDGTforD(double[] a, double[] b, double[] c, double[] d, double[] r, double[] r2) {
        for (int i = 0; i < a.length; i++) {
            double cc = c[i];
            double dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b[i]) ? cc : dd;
        }
    }

    // Use some constants in the comparison
    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveFGTforFCmpCon1(float a, float[] b, float[] c, float[] d, float[] r, float[] r2) {
        for (int i = 0; i < b.length; i++) {
            float cc = c[i];
            float dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a > b[i]) ? cc : dd;
        }
    }

    @Test
    @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    private static void testCMoveFGTforFCmpCon2(float[] a, float b, float[] c, float[] d, float[] r, float[] r2) {
        for (int i = 0; i < a.length; i++) {
            float cc = c[i];
            float dd = d[i];
            r2[i] = cc + dd;
            r[i] = (a[i] > b) ? cc : dd;
        }
    }

    // A case that is currently not supported and is not expected to vectorize
    @Test
    @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
    private static void testCMoveVDUnsupported() {
        double[] doublec = new double[SIZE];
        int seed = 1001;
        for (int i = 0; i < doublec.length; i++) {
            doublec[i] = (i % 2 == 0) ? seed + i : seed - i;
        }
    }

    @Warmup(0)
    @Run(test = {"testCMoveVFGT", "testCMoveVFLT","testCMoveVDLE", "testCMoveVDGE", "testCMoveVFEQ", "testCMoveVDNE",
                 "testCMoveVFGTSwap", "testCMoveVFLTSwap","testCMoveVDLESwap", "testCMoveVDGESwap",
                 "testCMoveFGTforFConst", "testCMoveFGEforFConst", "testCMoveFLTforFConst",
                 "testCMoveFLEforFConst", "testCMoveFEQforFConst", "testCMoveFNEQforFConst",
                 "testCMoveDGTforDConst", "testCMoveDGEforDConst", "testCMoveDLTforDConst",
                 "testCMoveDLEforDConst", "testCMoveDEQforDConst", "testCMoveDNEQforDConst"})
    private void testCMove_runner() {
        float[] floata = new float[SIZE];
        float[] floatb = new float[SIZE];
        float[] floatc = new float[SIZE];
        double[] doublea = new double[SIZE];
        double[] doubleb = new double[SIZE];
        double[] doublec = new double[SIZE];

        init(floata);
        init(floatb);
        init(doublea);
        init(doubleb);

        testCMoveVFGT(floata, floatb, floatc);
        testCMoveVDLE(doublea, doubleb, doublec);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(floatc[i], cmoveFloatGT(floata[i], floatb[i]));
            Asserts.assertEquals(doublec[i], cmoveDoubleLE(doublea[i], doubleb[i]));
        }

        testCMoveVFLT(floata, floatb, floatc);
        testCMoveVDGE(doublea, doubleb, doublec);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(floatc[i], cmoveFloatLT(floata[i], floatb[i]));
            Asserts.assertEquals(doublec[i], cmoveDoubleGE(doublea[i], doubleb[i]));
        }

        // Ensure we frequently have equals
        for (int i = 0; i < SIZE; i++) {
            if (i % 3 == 0) {
                floatb[i] = floata[i];
                doubleb[i] = doublea[i];
            }
        }

        testCMoveVFEQ(floata, floatb, floatc);
        testCMoveVDNE(doublea, doubleb, doublec);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(floatc[i], cmoveFloatEQ(floata[i], floatb[i]));
            Asserts.assertEquals(doublec[i], cmoveDoubleNE(doublea[i], doubleb[i]));
        }

        testCMoveVFGTSwap(floata, floatb, floatc);
        testCMoveVDLESwap(doublea, doubleb, doublec);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(floatc[i], cmoveFloatGTSwap(floata[i], floatb[i]));
            Asserts.assertEquals(doublec[i], cmoveDoubleLESwap(doublea[i], doubleb[i]));
        }

        testCMoveVFLTSwap(floata, floatb, floatc);
        testCMoveVDGESwap(doublea, doubleb, doublec);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(floatc[i], cmoveFloatLTSwap(floata[i], floatb[i]));
            Asserts.assertEquals(doublec[i], cmoveDoubleGESwap(doublea[i], doubleb[i]));
        }

        // Extensions: compare 2 values, and pick from 2 consts
        testCMoveFGTforFConst(floata, floatb, floatc);
        testCMoveDGTforDConst(doublea, doubleb, doublec);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(floatc[i], cmoveFGTforFConst(floata[i], floatb[i]));
            Asserts.assertEquals(doublec[i], cmoveDGTforDConst(doublea[i], doubleb[i]));
        }

        testCMoveFGEforFConst(floata, floatb, floatc);
        testCMoveDGEforDConst(doublea, doubleb, doublec);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(floatc[i], cmoveFGEforFConst(floata[i], floatb[i]));
            Asserts.assertEquals(doublec[i], cmoveDGEforDConst(doublea[i], doubleb[i]));
        }

        testCMoveFLTforFConst(floata, floatb, floatc);
        testCMoveDLTforDConst(doublea, doubleb, doublec);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(floatc[i], cmoveFLTforFConst(floata[i], floatb[i]));
            Asserts.assertEquals(doublec[i], cmoveDLTforDConst(doublea[i], doubleb[i]));
        }

        testCMoveFLEforFConst(floata, floatb, floatc);
        testCMoveDLEforDConst(doublea, doubleb, doublec);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(floatc[i], cmoveFLEforFConst(floata[i], floatb[i]));
            Asserts.assertEquals(doublec[i], cmoveDLEforDConst(doublea[i], doubleb[i]));
        }

        testCMoveFEQforFConst(floata, floatb, floatc);
        testCMoveDEQforDConst(doublea, doubleb, doublec);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(floatc[i], cmoveFEQforFConst(floata[i], floatb[i]));
            Asserts.assertEquals(doublec[i], cmoveDEQforDConst(doublea[i], doubleb[i]));
        }

        testCMoveFNEQforFConst(floata, floatb, floatc);
        testCMoveDNEQforDConst(doublea, doubleb, doublec);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(floatc[i], cmoveFNEQforFConst(floata[i], floatb[i]));
            Asserts.assertEquals(doublec[i], cmoveDNEQforDConst(doublea[i], doubleb[i]));
        }
    }

    @Warmup(0)
    @Run(test = {"testCMoveIGTforI",
                 "testCMoveIGTforL",
                 "testCMoveIGTforF",
                 "testCMoveIGTforD",
                 "testCMoveLGTforI",
                 "testCMoveLGTforL",
                 "testCMoveLGTforF",
                 "testCMoveLGTforD",
                 "testCMoveFGTforI",
                 "testCMoveFGTforL",
                 "testCMoveFGTforF",
                 "testCMoveFGTforD",
                 "testCMoveDGTforI",
                 "testCMoveDGTforL",
                 "testCMoveDGTforF",
                 "testCMoveDGTforD",
                 "testCMoveFGTforFCmpCon1",
                 "testCMoveFGTforFCmpCon2"})
    private void testCMove_runner_two() {
        int[] aI = new int[SIZE];
        int[] bI = new int[SIZE];
        int[] cI = new int[SIZE];
        int[] dI = new int[SIZE];
        int[] rI = new int[SIZE];
        long[] aL = new long[SIZE];
        long[] bL = new long[SIZE];
        long[] cL = new long[SIZE];
        long[] dL = new long[SIZE];
        long[] rL = new long[SIZE];
        float[] aF = new float[SIZE];
        float[] bF = new float[SIZE];
        float[] cF = new float[SIZE];
        float[] dF = new float[SIZE];
        float[] rF = new float[SIZE];
        double[] aD = new double[SIZE];
        double[] bD = new double[SIZE];
        double[] cD = new double[SIZE];
        double[] dD = new double[SIZE];
        double[] rD = new double[SIZE];

        init(aI);
        init(bI);
        init(cI);
        init(dI);
        init(aL);
        init(bL);
        init(cL);
        init(dL);
        init(aF);
        init(bF);
        init(cF);
        init(dF);
        init(aD);
        init(bD);
        init(cD);
        init(dD);

        testCMoveIGTforI(aI, bI, cI, dI, rI, rI);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rI[i], cmoveIGTforI(aI[i], bI[i], cI[i], dI[i]));
        }

        testCMoveIGTforL(aI, bI, cL, dL, rL, rL);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rL[i], cmoveIGTforL(aI[i], bI[i], cL[i], dL[i]));
        }

        testCMoveIGTforF(aI, bI, cF, dF, rF, rF);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rF[i], cmoveIGTforF(aI[i], bI[i], cF[i], dF[i]));
        }

        testCMoveIGTforD(aI, bI, cD, dD, rD, rD);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rD[i], cmoveIGTforD(aI[i], bI[i], cD[i], dD[i]));
        }

        testCMoveLGTforI(aL, bL, cI, dI, rI, rI);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rI[i], cmoveLGTforI(aL[i], bL[i], cI[i], dI[i]));
        }

        testCMoveLGTforL(aL, bL, cL, dL, rL, rL);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rL[i], cmoveLGTforL(aL[i], bL[i], cL[i], dL[i]));
        }

        testCMoveLGTforF(aL, bL, cF, dF, rF, rF);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rF[i], cmoveLGTforF(aL[i], bL[i], cF[i], dF[i]));
        }

        testCMoveLGTforD(aL, bL, cD, dD, rD, rD);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rD[i], cmoveLGTforD(aL[i], bL[i], cD[i], dD[i]));
        }

        testCMoveFGTforI(aF, bF, cI, dI, rI, rI);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rI[i], cmoveFGTforI(aF[i], bF[i], cI[i], dI[i]));
        }

        testCMoveFGTforL(aF, bF, cL, dL, rL, rL);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rL[i], cmoveFGTforL(aF[i], bF[i], cL[i], dL[i]));
        }

        testCMoveFGTforF(aF, bF, cF, dF, rF, rF);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rF[i], cmoveFGTforF(aF[i], bF[i], cF[i], dF[i]));
        }

        testCMoveFGTforD(aF, bF, cD, dD, rD, rD);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rD[i], cmoveFGTforD(aF[i], bF[i], cD[i], dD[i]));
        }

        testCMoveDGTforI(aD, bD, cI, dI, rI, rI);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rI[i], cmoveDGTforI(aD[i], bD[i], cI[i], dI[i]));
        }

        testCMoveDGTforL(aD, bD, cL, dL, rL, rL);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rL[i], cmoveDGTforL(aD[i], bD[i], cL[i], dL[i]));
        }

        testCMoveDGTforF(aD, bD, cF, dF, rF, rF);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rF[i], cmoveDGTforF(aD[i], bD[i], cF[i], dF[i]));
        }

        testCMoveDGTforD(aD, bD, cD, dD, rD, rD);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rD[i], cmoveDGTforD(aD[i], bD[i], cD[i], dD[i]));
        }

        // Use some constants/invariants in the comparison
        testCMoveFGTforFCmpCon1(aF[0], bF, cF, dF, rF, rF);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rF[i], cmoveFGTforF(aF[0], bF[i], cF[i], dF[i]));
        }

        testCMoveFGTforFCmpCon2(aF, bF[0], cF, dF, rF, rF);
        for (int i = 0; i < SIZE; i++) {
            Asserts.assertEquals(rF[i], cmoveFGTforF(aF[i], bF[0], cF[i], dF[i]));
        }
    }

    private static void init(int[] a) {
        for (int i = 0; i < SIZE; i++) {
            a[i] = RANDOM.nextInt();
        }
    }

    private static void init(long[] a) {
        for (int i = 0; i < SIZE; i++) {
            a[i] = RANDOM.nextLong();
        }
    }

    private static void init(float[] a) {
        for (int i = 0; i < SIZE; i++) {
            a[i] = RANDOM.nextFloat();
            if (RANDOM.nextInt() % 20 == 0) {
                a[i] = Float.NaN;
            }
        }
    }

    private static void init(double[] a) {
        for (int i = 0; i < SIZE; i++) {
            a[i] = RANDOM.nextDouble();
            if (RANDOM.nextInt() % 20 == 0) {
                a[i] = Double.NaN;
            }
        }
    }
}