/* * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package compiler.loopopts.superword; import compiler.lib.ir_framework.*; import jdk.test.lib.Utils; import jdk.test.whitebox.WhiteBox; import jdk.internal.misc.Unsafe; import java.lang.reflect.Array; import java.util.Map; import java.util.HashMap; import java.util.Random; import java.nio.ByteOrder; /* * @test id=NoAlignVector * @bug 8310190 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. * @modules java.base/jdk.internal.misc * @library /test/lib / * @requires vm.compiler2.enabled * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector */ /* * @test id=AlignVector * @bug 8310190 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. * @modules java.base/jdk.internal.misc * @library /test/lib / * @requires vm.compiler2.enabled * @run driver compiler.loopopts.superword.TestAlignVector AlignVector */ /* * @test id=VerifyAlignVector * @bug 8310190 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. * @modules java.base/jdk.internal.misc * @library /test/lib / * @requires vm.compiler2.enabled * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector */ public class TestAlignVector { static int RANGE = 1024*8; static int RANGE_FINAL = 1024*8; private static final Unsafe UNSAFE = Unsafe.getUnsafe(); private static final Random RANDOM = Utils.getRandomInstance(); // Inputs byte[] aB; byte[] bB; byte mB = (byte)31; short[] aS; short[] bS; short mS = (short)0xF0F0; int[] aI; int[] bI; int mI = 0xF0F0F0F0; long[] aL; long[] bL; long mL = 0xF0F0F0F0F0F0F0F0L; // List of tests Map tests = new HashMap(); // List of gold, the results from the first run before compilation Map golds = new HashMap(); interface TestFunction { Object[] run(); } public static void main(String[] args) { TestFramework framework = new TestFramework(TestAlignVector.class); framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", "-XX:LoopUnrollLimit=250"); switch (args[0]) { case "NoAlignVector" -> { framework.addFlags("-XX:-AlignVector"); } case "AlignVector" -> { framework.addFlags("-XX:+AlignVector"); } case "VerifyAlignVector" -> { framework.addFlags("-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } } framework.start(); } public TestAlignVector() { // Generate input once aB = generateB(); bB = generateB(); aS = generateS(); bS = generateS(); aI = generateI(); bI = generateI(); aL = generateL(); bL = generateL(); // Add all tests to list tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); }); tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); }); tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); }); tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); }); tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); }); tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); }); tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); }); tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); }); tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); }); tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); }); tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); }); tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); }); tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); }); tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); }); tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); }); tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); }); tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); }); tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); }); tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); }); tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); }); tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); }); tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); }); tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); }); tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); }); tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); }); tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); }); tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); }); tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); }); tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); }); tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); }); tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); }); tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); }); tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); }); tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); }); tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); }); tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); }); tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); }); tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); }); tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); tests.put("test14aB", () -> { return test14aB(aB.clone()); }); tests.put("test14bB", () -> { return test14bB(aB.clone()); }); tests.put("test14cB", () -> { return test14cB(aB.clone()); }); tests.put("test15aB", () -> { return test15aB(aB.clone()); }); tests.put("test15bB", () -> { return test15bB(aB.clone()); }); tests.put("test15cB", () -> { return test15cB(aB.clone()); }); tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); }); tests.put("test16b", () -> { return test16b(aB.clone()); }); tests.put("test17a", () -> { return test17a(aL.clone()); }); tests.put("test17b", () -> { return test17b(aL.clone()); }); tests.put("test17c", () -> { return test17c(aL.clone()); }); tests.put("test17d", () -> { return test17d(aL.clone()); }); tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); }); tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); }); tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); }); tests.put("test20", () -> { return test20(aB.clone()); }); // Compute gold value for all test methods before compilation for (Map.Entry entry : tests.entrySet()) { String name = entry.getKey(); TestFunction test = entry.getValue(); Object[] gold = test.run(); golds.put(name, gold); } } @Warmup(100) @Run(test = {"test0", "test1", "test2", "test3", "test4", "test5", "test6", "test7", "test8", "test9", "test10a", "test10b", "test10c", "test10d", "test11aB", "test11aS", "test11aI", "test11aL", "test11bB", "test11bS", "test11bI", "test11bL", "test11cB", "test11cS", "test11cI", "test11cL", "test11dB", "test11dS", "test11dI", "test11dL", "test12", "test13aIL", "test13aIB", "test13aIS", "test13aBSIL", "test13bIL", "test13bIB", "test13bIS", "test13bBSIL", "test14aB", "test14bB", "test14cB", "test15aB", "test15bB", "test15cB", "test16a", "test16b", "test17a", "test17b", "test17c", "test17d", "test18a", "test18b", "test19", "test20"}) public void runTests() { for (Map.Entry entry : tests.entrySet()) { String name = entry.getKey(); TestFunction test = entry.getValue(); // Recall gold value from before compilation Object[] gold = golds.get(name); // Compute new result Object[] result = test.run(); // Compare gold and new result verify(name, gold, result); } } static byte[] generateB() { byte[] a = new byte[RANGE]; for (int i = 0; i < a.length; i++) { a[i] = (byte)RANDOM.nextInt(); } return a; } static short[] generateS() { short[] a = new short[RANGE]; for (int i = 0; i < a.length; i++) { a[i] = (short)RANDOM.nextInt(); } return a; } static int[] generateI() { int[] a = new int[RANGE]; for (int i = 0; i < a.length; i++) { a[i] = RANDOM.nextInt(); } return a; } static long[] generateL() { long[] a = new long[RANGE]; for (int i = 0; i < a.length; i++) { a[i] = RANDOM.nextLong(); } return a; } static void verify(String name, Object[] gold, Object[] result) { if (gold.length != result.length) { throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " + gold.length + ", result.length = " + result.length); } for (int i = 0; i < gold.length; i++) { Object g = gold[i]; Object r = result[i]; if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) { throw new RuntimeException("verify " + name + ": must both be array of same type:" + " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); } if (g == r) { throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" + " gold[" + i + "] == result[" + i + "]"); } if (Array.getLength(g) != Array.getLength(r)) { throw new RuntimeException("verify " + name + ": arrays must have same length:" + " gold[" + i + "].length = " + Array.getLength(g) + " result[" + i + "].length = " + Array.getLength(r)); } Class c = g.getClass().getComponentType(); if (c == byte.class) { verifyB(name, i, (byte[])g, (byte[])r); } else if (c == short.class) { verifyS(name, i, (short[])g, (short[])r); } else if (c == int.class) { verifyI(name, i, (int[])g, (int[])r); } else if (c == long.class) { verifyL(name, i, (long[])g, (long[])r); } else { throw new RuntimeException("verify " + name + ": array type not supported for verify:" + " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); } } } static void verifyB(String name, int i, byte[] g, byte[] r) { for (int j = 0; j < g.length; j++) { if (g[j] != r[j]) { throw new RuntimeException("verify " + name + ": arrays must have same content:" + " gold[" + i + "][" + j + "] = " + g[j] + " result[" + i + "][" + j + "] = " + r[j]); } } } static void verifyS(String name, int i, short[] g, short[] r) { for (int j = 0; j < g.length; j++) { if (g[j] != r[j]) { throw new RuntimeException("verify " + name + ": arrays must have same content:" + " gold[" + i + "][" + j + "] = " + g[j] + " result[" + i + "][" + j + "] = " + r[j]); } } } static void verifyI(String name, int i, int[] g, int[] r) { for (int j = 0; j < g.length; j++) { if (g[j] != r[j]) { throw new RuntimeException("verify " + name + ": arrays must have same content:" + " gold[" + i + "][" + j + "] = " + g[j] + " result[" + i + "][" + j + "] = " + r[j]); } } } static void verifyL(String name, int i, long[] g, long[] r) { for (int j = 0; j < g.length; j++) { if (g[j] != r[j]) { throw new RuntimeException("verify " + name + ": arrays must have same content:" + " gold[" + i + "][" + j + "] = " + g[j] + " result[" + i + "][" + j + "] = " + r[j]); } } } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIf = {"MaxVectorSize", ">=8"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test0(byte[] a, byte[] b, byte mask) { for (int i = 0; i < RANGE; i+=8) { // Safe to vectorize with AlignVector b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 b[i+1] = (byte)(a[i+1] & mask); b[i+2] = (byte)(a[i+2] & mask); b[i+3] = (byte)(a[i+3] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.AND_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test1(byte[] a, byte[] b, byte mask) { for (int i = 0; i < RANGE; i+=8) { // Safe to vectorize with AlignVector b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 b[i+1] = (byte)(a[i+1] & mask); b[i+2] = (byte)(a[i+2] & mask); b[i+3] = (byte)(a[i+3] & mask); b[i+4] = (byte)(a[i+4] & mask); b[i+5] = (byte)(a[i+5] & mask); b[i+6] = (byte)(a[i+6] & mask); b[i+7] = (byte)(a[i+7] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", IRNode.AND_VB, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test2(byte[] a, byte[] b, byte mask) { for (int i = 0; i < RANGE; i+=8) { // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3 b[i+4] = (byte)(a[i+4] & mask); b[i+5] = (byte)(a[i+5] & mask); b[i+6] = (byte)(a[i+6] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", IRNode.AND_VB, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test3(byte[] a, byte[] b, byte mask) { for (int i = 0; i < RANGE; i+=8) { // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 // Problematic for AlignVector b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes b[i+4] = (byte)(a[i+4] & mask); b[i+5] = (byte)(a[i+5] & mask); b[i+6] = (byte)(a[i+6] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0", IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"}) static Object[] test4(byte[] a, byte[] b, byte mask) { for (int i = 0; i < RANGE/16; i++) { // Problematic for AlignVector b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask); b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask); b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask); b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask); b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask); b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask); b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask); b[i*16 + 10] = (byte)(a[i*16 + 10] & mask); b[i*16 + 11] = (byte)(a[i*16 + 11] & mask); b[i*16 + 12] = (byte)(a[i*16 + 12] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", IRNode.AND_VB, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test5(byte[] a, byte[] b, byte mask, int inv) { for (int i = 0; i < RANGE; i+=8) { // Cannot align with AlignVector because of invariant b[i+inv+0] = (byte)(a[i+inv+0] & mask); b[i+inv+3] = (byte)(a[i+inv+3] & mask); b[i+inv+4] = (byte)(a[i+inv+4] & mask); b[i+inv+5] = (byte)(a[i+inv+5] & mask); b[i+inv+6] = (byte)(a[i+inv+6] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", IRNode.AND_VB, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test6(byte[] a, byte[] b, byte mask) { for (int i = 0; i < RANGE/8; i+=2) { // Cannot align with AlignVector because offset is odd b[i*4+0] = (byte)(a[i*4+0] & mask); b[i*4+3] = (byte)(a[i*4+3] & mask); b[i*4+4] = (byte)(a[i*4+4] & mask); b[i*4+5] = (byte)(a[i*4+5] & mask); b[i*4+6] = (byte)(a[i*4+6] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", IRNode.AND_VS, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test7(short[] a, short[] b, short mask) { for (int i = 0; i < RANGE/8; i+=2) { // Cannot align with AlignVector because offset is odd b[i*4+0] = (short)(a[i*4+0] & mask); b[i*4+3] = (short)(a[i*4+3] & mask); b[i*4+4] = (short)(a[i*4+4] & mask); b[i*4+5] = (short)(a[i*4+5] & mask); b[i*4+6] = (short)(a[i*4+6] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", IRNode.AND_VB, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test8(byte[] a, byte[] b, byte mask, int init) { for (int i = init; i < RANGE; i+=8) { // Cannot align with AlignVector because of invariant (variable init becomes invar) b[i+0] = (byte)(a[i+0] & mask); b[i+3] = (byte)(a[i+3] & mask); b[i+4] = (byte)(a[i+4] & mask); b[i+5] = (byte)(a[i+5] & mask); b[i+6] = (byte)(a[i+6] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIf = {"MaxVectorSize", ">=8"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test9(byte[] a, byte[] b, byte mask) { // known non-zero init value does not affect offset, but has implicit effect on iv for (int i = 13; i < RANGE-8; i+=8) { b[i+0] = (byte)(a[i+0] & mask); b[i+3] = (byte)(a[i+3] & mask); b[i+4] = (byte)(a[i+4] & mask); b[i+5] = (byte)(a[i+5] & mask); b[i+6] = (byte)(a[i+6] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", IRNode.AND_VB, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test10a(byte[] a, byte[] b, byte mask) { // This is not alignable with pre-loop, because of odd init. for (int i = 3; i < RANGE-8; i+=8) { b[i+0] = (byte)(a[i+0] & mask); b[i+1] = (byte)(a[i+1] & mask); b[i+2] = (byte)(a[i+2] & mask); b[i+3] = (byte)(a[i+3] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", IRNode.AND_VB, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test10b(byte[] a, byte[] b, byte mask) { // This is not alignable with pre-loop, because of odd init. // Seems not correctly handled. for (int i = 13; i < RANGE-8; i+=8) { b[i+0] = (byte)(a[i+0] & mask); b[i+1] = (byte)(a[i+1] & mask); b[i+2] = (byte)(a[i+2] & mask); b[i+3] = (byte)(a[i+3] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", IRNode.AND_VS, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test10c(short[] a, short[] b, short mask) { // This is not alignable with pre-loop, because of odd init. // Seems not correctly handled with MaxVectorSize >= 32. for (int i = 13; i < RANGE-8; i+=8) { b[i+0] = (short)(a[i+0] & mask); b[i+1] = (short)(a[i+1] & mask); b[i+2] = (short)(a[i+2] & mask); b[i+3] = (short)(a[i+3] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIf = {"MaxVectorSize", ">=16"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test10d(short[] a, short[] b, short mask) { for (int i = 13; i < RANGE-16; i+=8) { // init + offset -> aligned b[i+0+3] = (short)(a[i+0+3] & mask); b[i+1+3] = (short)(a[i+1+3] & mask); b[i+2+3] = (short)(a[i+2+3] & mask); b[i+3+3] = (short)(a[i+3+3] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.AND_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11aB(byte[] a, byte[] b, byte mask) { for (int i = 0; i < RANGE; i++) { // always alignable b[i+0] = (byte)(a[i+0] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.AND_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11aS(short[] a, short[] b, short mask) { for (int i = 0; i < RANGE; i++) { // always alignable b[i+0] = (short)(a[i+0] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.AND_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11aI(int[] a, int[] b, int mask) { for (int i = 0; i < RANGE; i++) { // always alignable b[i+0] = (int)(a[i+0] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.AND_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11aL(long[] a, long[] b, long mask) { for (int i = 0; i < RANGE; i++) { // always alignable b[i+0] = (long)(a[i+0] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.AND_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11bB(byte[] a, byte[] b, byte mask) { for (int i = 1; i < RANGE; i++) { // always alignable b[i+0] = (byte)(a[i+0] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.AND_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11bS(short[] a, short[] b, short mask) { for (int i = 1; i < RANGE; i++) { // always alignable b[i+0] = (short)(a[i+0] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.AND_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11bI(int[] a, int[] b, int mask) { for (int i = 1; i < RANGE; i++) { // always alignable b[i+0] = (int)(a[i+0] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.AND_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11bL(long[] a, long[] b, long mask) { for (int i = 1; i < RANGE; i++) { // always alignable b[i+0] = (long)(a[i+0] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.AND_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "false"}) @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", IRNode.AND_VB, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test11cB(byte[] a, byte[] b, byte mask) { for (int i = 1; i < RANGE-1; i++) { // 1 byte offset -> not alignable with AlignVector b[i+0] = (byte)(a[i+1] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.AND_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "false"}) @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", IRNode.AND_VS, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test11cS(short[] a, short[] b, short mask) { for (int i = 1; i < RANGE-1; i++) { // 2 byte offset -> not alignable with AlignVector b[i+0] = (short)(a[i+1] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.AND_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "false"}) @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", IRNode.AND_VI, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test11cI(int[] a, int[] b, int mask) { for (int i = 1; i < RANGE-1; i++) { // 4 byte offset -> not alignable with AlignVector b[i+0] = (int)(a[i+1] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.AND_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11cL(long[] a, long[] b, long mask) { for (int i = 1; i < RANGE-1; i++) { // always alignable (8 byte offset) b[i+0] = (long)(a[i+1] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.AND_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) { for (int i = 0; i < RANGE; i++) { b[i+0+invar] = (byte)(a[i+0+invar] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.AND_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11dS(short[] a, short[] b, short mask, int invar) { for (int i = 0; i < RANGE; i++) { b[i+0+invar] = (short)(a[i+0+invar] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.AND_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11dI(int[] a, int[] b, int mask, int invar) { for (int i = 0; i < RANGE; i++) { b[i+0+invar] = (int)(a[i+0+invar] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.AND_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test11dL(long[] a, long[] b, long mask, int invar) { for (int i = 0; i < RANGE; i++) { b[i+0+invar] = (long)(a[i+0+invar] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", IRNode.AND_VB, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test12(byte[] a, byte[] b, byte mask) { for (int i = 0; i < RANGE/16; i++) { // Currently does not vectorize at all b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask); b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask); b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask); b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask); } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true"}) // require avx to ensure vectors are larger than what unrolling produces static Object[] test13aIL(int[] a, long[] b) { for (int i = 0; i < RANGE; i++) { a[i]++; b[i]++; } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.LOAD_VECTOR_I, "> 0", IRNode.ADD_VB, "> 0", IRNode.ADD_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test13aIB(int[] a, byte[] b) { for (int i = 0; i < RANGE; i++) { a[i]++; b[i]++; } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.LOAD_VECTOR_S, "> 0", IRNode.ADD_VI, "> 0", IRNode.ADD_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test13aIS(int[] a, short[] b) { for (int i = 0; i < RANGE; i++) { a[i]++; b[i]++; } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.LOAD_VECTOR_S, "> 0", IRNode.LOAD_VECTOR_I, "> 0", IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VB, "> 0", IRNode.ADD_VS, "> 0", IRNode.ADD_VI, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) { for (int i = 0; i < RANGE; i++) { a[i]++; b[i]++; c[i]++; d[i]++; } return new Object[]{ a, b, c, d }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true"}) // require avx to ensure vectors are larger than what unrolling produces static Object[] test13bIL(int[] a, long[] b) { for (int i = 1; i < RANGE; i++) { a[i]++; b[i]++; } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.LOAD_VECTOR_I, "> 0", IRNode.ADD_VB, "> 0", IRNode.ADD_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test13bIB(int[] a, byte[] b) { for (int i = 1; i < RANGE; i++) { a[i]++; b[i]++; } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.LOAD_VECTOR_S, "> 0", IRNode.ADD_VI, "> 0", IRNode.ADD_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test13bIS(int[] a, short[] b) { for (int i = 1; i < RANGE; i++) { a[i]++; b[i]++; } return new Object[]{ a, b }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.LOAD_VECTOR_S, "> 0", IRNode.LOAD_VECTOR_I, "> 0", IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VB, "> 0", IRNode.ADD_VS, "> 0", IRNode.ADD_VI, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) { for (int i = 1; i < RANGE; i++) { a[i]++; b[i]++; c[i]++; d[i]++; } return new Object[]{ a, b, c, d }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.ADD_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "false"}) @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", IRNode.ADD_VB, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test14aB(byte[] a) { // non-power-of-2 stride for (int i = 0; i < RANGE-20; i+=9) { a[i+0]++; a[i+1]++; a[i+2]++; a[i+3]++; a[i+4]++; a[i+5]++; a[i+6]++; a[i+7]++; a[i+8]++; a[i+9]++; a[i+10]++; a[i+11]++; a[i+12]++; a[i+13]++; a[i+14]++; a[i+15]++; } return new Object[]{ a }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.ADD_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "false"}) @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", IRNode.ADD_VB, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test14bB(byte[] a) { // non-power-of-2 stride for (int i = 0; i < RANGE-20; i+=3) { a[i+0]++; a[i+1]++; a[i+2]++; a[i+3]++; a[i+4]++; a[i+5]++; a[i+6]++; a[i+7]++; a[i+8]++; a[i+9]++; a[i+10]++; a[i+11]++; a[i+12]++; a[i+13]++; a[i+14]++; a[i+15]++; } return new Object[]{ a }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.ADD_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "false"}) @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", IRNode.ADD_VB, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test14cB(byte[] a) { // non-power-of-2 stride for (int i = 0; i < RANGE-20; i+=5) { a[i+0]++; a[i+1]++; a[i+2]++; a[i+3]++; a[i+4]++; a[i+5]++; a[i+6]++; a[i+7]++; a[i+8]++; a[i+9]++; a[i+10]++; a[i+11]++; a[i+12]++; a[i+13]++; a[i+14]++; a[i+15]++; } return new Object[]{ a }; } @Test // IR rules difficult because of modulo wrapping with offset after peeling. static Object[] test15aB(byte[] a) { // non-power-of-2 scale for (int i = 0; i < RANGE/64-20; i++) { a[53*i+0]++; a[53*i+1]++; a[53*i+2]++; a[53*i+3]++; a[53*i+4]++; a[53*i+5]++; a[53*i+6]++; a[53*i+7]++; a[53*i+8]++; a[53*i+9]++; a[53*i+10]++; a[53*i+11]++; a[53*i+12]++; a[53*i+13]++; a[53*i+14]++; a[53*i+15]++; } return new Object[]{ a }; } @Test // IR rules difficult because of modulo wrapping with offset after peeling. static Object[] test15bB(byte[] a) { // non-power-of-2 scale for (int i = 0; i < RANGE/64-20; i++) { a[25*i+0]++; a[25*i+1]++; a[25*i+2]++; a[25*i+3]++; a[25*i+4]++; a[25*i+5]++; a[25*i+6]++; a[25*i+7]++; a[25*i+8]++; a[25*i+9]++; a[25*i+10]++; a[25*i+11]++; a[25*i+12]++; a[25*i+13]++; a[25*i+14]++; a[25*i+15]++; } return new Object[]{ a }; } @Test // IR rules difficult because of modulo wrapping with offset after peeling. static Object[] test15cB(byte[] a) { // non-power-of-2 scale for (int i = 0; i < RANGE/64-20; i++) { a[19*i+0]++; a[19*i+1]++; a[19*i+2]++; a[19*i+3]++; a[19*i+4]++; a[19*i+5]++; a[19*i+6]++; a[19*i+7]++; a[19*i+8]++; a[19*i+9]++; a[19*i+10]++; a[19*i+11]++; a[19*i+12]++; a[19*i+13]++; a[19*i+14]++; a[19*i+15]++; } return new Object[]{ a }; } @Test static Object[] test16a(byte[] a, short[] b) { // infinite loop issues for (int i = 0; i < RANGE/2-20; i++) { a[2*i+0]++; a[2*i+1]++; a[2*i+2]++; a[2*i+3]++; a[2*i+4]++; a[2*i+5]++; a[2*i+6]++; a[2*i+7]++; a[2*i+8]++; a[2*i+9]++; a[2*i+10]++; a[2*i+11]++; a[2*i+12]++; a[2*i+13]++; a[2*i+14]++; b[2*i+0]++; b[2*i+1]++; b[2*i+2]++; b[2*i+3]++; } return new Object[]{ a, b }; } @Test static Object[] test16b(byte[] a) { // infinite loop issues for (int i = 0; i < RANGE/2-20; i++) { a[2*i+0]++; a[2*i+1]++; a[2*i+2]++; a[2*i+3]++; a[2*i+4]++; a[2*i+5]++; a[2*i+6]++; a[2*i+7]++; a[2*i+8]++; a[2*i+9]++; a[2*i+10]++; a[2*i+11]++; a[2*i+12]++; a[2*i+13]++; a[2*i+14]++; } return new Object[]{ a }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test17a(long[] a) { // Unsafe: vectorizes with profiling (not xcomp) for (int i = 0; i < RANGE; i++) { int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i; long v = UNSAFE.getLongUnaligned(a, adr); UNSAFE.putLongUnaligned(a, adr, v + 1); } return new Object[]{ a }; } @Test // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs. static Object[] test17b(long[] a) { // Not alignable for (int i = 0; i < RANGE-1; i++) { int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1; long v = UNSAFE.getLongUnaligned(a, adr); UNSAFE.putLongUnaligned(a, adr, v + 1); } return new Object[]{ a }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIf = {"MaxVectorSize", ">=32"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static Object[] test17c(long[] a) { // Unsafe: aligned vectorizes for (int i = 0; i < RANGE-1; i+=4) { int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i; long v0 = UNSAFE.getLongUnaligned(a, adr + 0); long v1 = UNSAFE.getLongUnaligned(a, adr + 8); UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); } return new Object[]{ a }; } @Test @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"}) // Ensure vector width is large enough to fit 64 byte for longs: // The offsets are: 25, 33, 57, 65 // In modulo 32: 25, 1, 25, 1 -> does not vectorize // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes // This problem is because we compute modulo vector width in memory_alignment. @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0", IRNode.ADD_VL, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "true"}) static Object[] test17d(long[] a) { // Not alignable for (int i = 0; i < RANGE-1; i+=4) { int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1; long v0 = UNSAFE.getLongUnaligned(a, adr + 0); long v1 = UNSAFE.getLongUnaligned(a, adr + 8); UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); } return new Object[]{ a }; } @Test static Object[] test18a(byte[] a, int[] b) { // scale = 0 --> no iv for (int i = 0; i < RANGE; i++) { a[0] = 1; b[i] = 2; a[1] = 1; } return new Object[]{ a, b }; } @Test static Object[] test18b(byte[] a, int[] b) { // scale = 0 --> no iv for (int i = 0; i < RANGE; i++) { a[1] = 1; b[i] = 2; a[2] = 1; } return new Object[]{ a, b }; } @Test static Object[] test19(int[] a, int[] b) { for (int i = 5000; i > 0; i--) { a[RANGE_FINAL - i] = b[RANGE_FINAL - i]; } return new Object[]{ a, b }; } @Test static Object[] test20(byte[] a) { // Example where it is easy to pass alignment check, // but used to fail the alignment calculation for (int i = 1; i < RANGE/2-50; i++) { a[2*i+0+30]++; a[2*i+1+30]++; a[2*i+2+30]++; a[2*i+3+30]++; } return new Object[]{ a }; } }