1480 lines
56 KiB
Java
1480 lines
56 KiB
Java
|
/*
|
||
|
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||
|
*
|
||
|
* This code is free software; you can redistribute it and/or modify it
|
||
|
* under the terms of the GNU General Public License version 2 only, as
|
||
|
* published by the Free Software Foundation.
|
||
|
*
|
||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||
|
* accompanied this code).
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License version
|
||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||
|
*
|
||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||
|
* or visit www.oracle.com if you need additional information or have any
|
||
|
* questions.
|
||
|
*/
|
||
|
|
||
|
package compiler.loopopts.superword;
|
||
|
|
||
|
import compiler.lib.ir_framework.*;
|
||
|
import jdk.test.lib.Utils;
|
||
|
import jdk.test.whitebox.WhiteBox;
|
||
|
import jdk.internal.misc.Unsafe;
|
||
|
import java.lang.reflect.Array;
|
||
|
import java.util.Map;
|
||
|
import java.util.HashMap;
|
||
|
import java.util.Random;
|
||
|
import java.nio.ByteOrder;
|
||
|
|
||
|
/*
|
||
|
* @test id=NoAlignVector
|
||
|
* @bug 8310190
|
||
|
* @summary Test AlignVector with various loop init, stride, scale, invar, etc.
|
||
|
* @modules java.base/jdk.internal.misc
|
||
|
* @library /test/lib /
|
||
|
* @requires vm.compiler2.enabled
|
||
|
* @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* @test id=AlignVector
|
||
|
* @bug 8310190
|
||
|
* @summary Test AlignVector with various loop init, stride, scale, invar, etc.
|
||
|
* @modules java.base/jdk.internal.misc
|
||
|
* @library /test/lib /
|
||
|
* @requires vm.compiler2.enabled
|
||
|
* @run driver compiler.loopopts.superword.TestAlignVector AlignVector
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* @test id=VerifyAlignVector
|
||
|
* @bug 8310190
|
||
|
* @summary Test AlignVector with various loop init, stride, scale, invar, etc.
|
||
|
* @modules java.base/jdk.internal.misc
|
||
|
* @library /test/lib /
|
||
|
* @requires vm.compiler2.enabled
|
||
|
* @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector
|
||
|
*/
|
||
|
|
||
|
public class TestAlignVector {
|
||
|
static int RANGE = 1024*8;
|
||
|
static int RANGE_FINAL = 1024*8;
|
||
|
private static final Unsafe UNSAFE = Unsafe.getUnsafe();
|
||
|
private static final Random RANDOM = Utils.getRandomInstance();
|
||
|
|
||
|
// Inputs
|
||
|
byte[] aB;
|
||
|
byte[] bB;
|
||
|
byte mB = (byte)31;
|
||
|
short[] aS;
|
||
|
short[] bS;
|
||
|
short mS = (short)0xF0F0;
|
||
|
int[] aI;
|
||
|
int[] bI;
|
||
|
int mI = 0xF0F0F0F0;
|
||
|
long[] aL;
|
||
|
long[] bL;
|
||
|
long mL = 0xF0F0F0F0F0F0F0F0L;
|
||
|
|
||
|
// List of tests
|
||
|
Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
|
||
|
|
||
|
// List of gold, the results from the first run before compilation
|
||
|
Map<String,Object[]> golds = new HashMap<String,Object[]>();
|
||
|
|
||
|
interface TestFunction {
|
||
|
Object[] run();
|
||
|
}
|
||
|
|
||
|
public static void main(String[] args) {
|
||
|
TestFramework framework = new TestFramework(TestAlignVector.class);
|
||
|
framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
|
||
|
"-XX:LoopUnrollLimit=250");
|
||
|
|
||
|
switch (args[0]) {
|
||
|
case "NoAlignVector" -> { framework.addFlags("-XX:-AlignVector"); }
|
||
|
case "AlignVector" -> { framework.addFlags("-XX:+AlignVector"); }
|
||
|
case "VerifyAlignVector" -> { framework.addFlags("-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
|
||
|
default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
|
||
|
}
|
||
|
framework.start();
|
||
|
}
|
||
|
|
||
|
public TestAlignVector() {
|
||
|
// Generate input once
|
||
|
aB = generateB();
|
||
|
bB = generateB();
|
||
|
aS = generateS();
|
||
|
bS = generateS();
|
||
|
aI = generateI();
|
||
|
bI = generateI();
|
||
|
aL = generateL();
|
||
|
bL = generateL();
|
||
|
|
||
|
// Add all tests to list
|
||
|
tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
|
||
|
tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); });
|
||
|
tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
|
||
|
tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
|
||
|
tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
|
||
|
tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
|
||
|
tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
|
||
|
tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
|
||
|
tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
|
||
|
tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
|
||
|
tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
|
||
|
|
||
|
tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
|
||
|
tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
|
||
|
tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
|
||
|
tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
|
||
|
|
||
|
tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
|
||
|
tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
|
||
|
tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
|
||
|
tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
|
||
|
|
||
|
tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
|
||
|
tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
|
||
|
tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
|
||
|
tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
|
||
|
|
||
|
tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
|
||
|
tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
|
||
|
tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
|
||
|
tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
|
||
|
|
||
|
tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
|
||
|
tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
|
||
|
tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
|
||
|
tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
|
||
|
|
||
|
tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); });
|
||
|
|
||
|
tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); });
|
||
|
tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); });
|
||
|
tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); });
|
||
|
tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
|
||
|
|
||
|
tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); });
|
||
|
tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); });
|
||
|
tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); });
|
||
|
tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
|
||
|
|
||
|
tests.put("test14aB", () -> { return test14aB(aB.clone()); });
|
||
|
tests.put("test14bB", () -> { return test14bB(aB.clone()); });
|
||
|
tests.put("test14cB", () -> { return test14cB(aB.clone()); });
|
||
|
|
||
|
tests.put("test15aB", () -> { return test15aB(aB.clone()); });
|
||
|
tests.put("test15bB", () -> { return test15bB(aB.clone()); });
|
||
|
tests.put("test15cB", () -> { return test15cB(aB.clone()); });
|
||
|
|
||
|
tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); });
|
||
|
tests.put("test16b", () -> { return test16b(aB.clone()); });
|
||
|
|
||
|
tests.put("test17a", () -> { return test17a(aL.clone()); });
|
||
|
tests.put("test17b", () -> { return test17b(aL.clone()); });
|
||
|
tests.put("test17c", () -> { return test17c(aL.clone()); });
|
||
|
tests.put("test17d", () -> { return test17d(aL.clone()); });
|
||
|
|
||
|
tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
|
||
|
tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
|
||
|
|
||
|
tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
|
||
|
tests.put("test20", () -> { return test20(aB.clone()); });
|
||
|
|
||
|
// Compute gold value for all test methods before compilation
|
||
|
for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
|
||
|
String name = entry.getKey();
|
||
|
TestFunction test = entry.getValue();
|
||
|
Object[] gold = test.run();
|
||
|
golds.put(name, gold);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
@Warmup(100)
|
||
|
@Run(test = {"test0",
|
||
|
"test1",
|
||
|
"test2",
|
||
|
"test3",
|
||
|
"test4",
|
||
|
"test5",
|
||
|
"test6",
|
||
|
"test7",
|
||
|
"test8",
|
||
|
"test9",
|
||
|
"test10a",
|
||
|
"test10b",
|
||
|
"test10c",
|
||
|
"test10d",
|
||
|
"test11aB",
|
||
|
"test11aS",
|
||
|
"test11aI",
|
||
|
"test11aL",
|
||
|
"test11bB",
|
||
|
"test11bS",
|
||
|
"test11bI",
|
||
|
"test11bL",
|
||
|
"test11cB",
|
||
|
"test11cS",
|
||
|
"test11cI",
|
||
|
"test11cL",
|
||
|
"test11dB",
|
||
|
"test11dS",
|
||
|
"test11dI",
|
||
|
"test11dL",
|
||
|
"test12",
|
||
|
"test13aIL",
|
||
|
"test13aIB",
|
||
|
"test13aIS",
|
||
|
"test13aBSIL",
|
||
|
"test13bIL",
|
||
|
"test13bIB",
|
||
|
"test13bIS",
|
||
|
"test13bBSIL",
|
||
|
"test14aB",
|
||
|
"test14bB",
|
||
|
"test14cB",
|
||
|
"test15aB",
|
||
|
"test15bB",
|
||
|
"test15cB",
|
||
|
"test16a",
|
||
|
"test16b",
|
||
|
"test17a",
|
||
|
"test17b",
|
||
|
"test17c",
|
||
|
"test17d",
|
||
|
"test18a",
|
||
|
"test18b",
|
||
|
"test19",
|
||
|
"test20"})
|
||
|
public void runTests() {
|
||
|
for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
|
||
|
String name = entry.getKey();
|
||
|
TestFunction test = entry.getValue();
|
||
|
// Recall gold value from before compilation
|
||
|
Object[] gold = golds.get(name);
|
||
|
// Compute new result
|
||
|
Object[] result = test.run();
|
||
|
// Compare gold and new result
|
||
|
verify(name, gold, result);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static byte[] generateB() {
|
||
|
byte[] a = new byte[RANGE];
|
||
|
for (int i = 0; i < a.length; i++) {
|
||
|
a[i] = (byte)RANDOM.nextInt();
|
||
|
}
|
||
|
return a;
|
||
|
}
|
||
|
|
||
|
static short[] generateS() {
|
||
|
short[] a = new short[RANGE];
|
||
|
for (int i = 0; i < a.length; i++) {
|
||
|
a[i] = (short)RANDOM.nextInt();
|
||
|
}
|
||
|
return a;
|
||
|
}
|
||
|
|
||
|
static int[] generateI() {
|
||
|
int[] a = new int[RANGE];
|
||
|
for (int i = 0; i < a.length; i++) {
|
||
|
a[i] = RANDOM.nextInt();
|
||
|
}
|
||
|
return a;
|
||
|
}
|
||
|
|
||
|
static long[] generateL() {
|
||
|
long[] a = new long[RANGE];
|
||
|
for (int i = 0; i < a.length; i++) {
|
||
|
a[i] = RANDOM.nextLong();
|
||
|
}
|
||
|
return a;
|
||
|
}
|
||
|
|
||
|
static void verify(String name, Object[] gold, Object[] result) {
|
||
|
if (gold.length != result.length) {
|
||
|
throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
|
||
|
gold.length + ", result.length = " + result.length);
|
||
|
}
|
||
|
for (int i = 0; i < gold.length; i++) {
|
||
|
Object g = gold[i];
|
||
|
Object r = result[i];
|
||
|
if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
|
||
|
throw new RuntimeException("verify " + name + ": must both be array of same type:" +
|
||
|
" gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
|
||
|
" result[" + i + "].getClass() = " + r.getClass().getSimpleName());
|
||
|
}
|
||
|
if (g == r) {
|
||
|
throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
|
||
|
" gold[" + i + "] == result[" + i + "]");
|
||
|
}
|
||
|
if (Array.getLength(g) != Array.getLength(r)) {
|
||
|
throw new RuntimeException("verify " + name + ": arrays must have same length:" +
|
||
|
" gold[" + i + "].length = " + Array.getLength(g) +
|
||
|
" result[" + i + "].length = " + Array.getLength(r));
|
||
|
}
|
||
|
Class c = g.getClass().getComponentType();
|
||
|
if (c == byte.class) {
|
||
|
verifyB(name, i, (byte[])g, (byte[])r);
|
||
|
} else if (c == short.class) {
|
||
|
verifyS(name, i, (short[])g, (short[])r);
|
||
|
} else if (c == int.class) {
|
||
|
verifyI(name, i, (int[])g, (int[])r);
|
||
|
} else if (c == long.class) {
|
||
|
verifyL(name, i, (long[])g, (long[])r);
|
||
|
} else {
|
||
|
throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
|
||
|
" gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
|
||
|
" result[" + i + "].getClass() = " + r.getClass().getSimpleName());
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void verifyB(String name, int i, byte[] g, byte[] r) {
|
||
|
for (int j = 0; j < g.length; j++) {
|
||
|
if (g[j] != r[j]) {
|
||
|
throw new RuntimeException("verify " + name + ": arrays must have same content:" +
|
||
|
" gold[" + i + "][" + j + "] = " + g[j] +
|
||
|
" result[" + i + "][" + j + "] = " + r[j]);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void verifyS(String name, int i, short[] g, short[] r) {
|
||
|
for (int j = 0; j < g.length; j++) {
|
||
|
if (g[j] != r[j]) {
|
||
|
throw new RuntimeException("verify " + name + ": arrays must have same content:" +
|
||
|
" gold[" + i + "][" + j + "] = " + g[j] +
|
||
|
" result[" + i + "][" + j + "] = " + r[j]);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void verifyI(String name, int i, int[] g, int[] r) {
|
||
|
for (int j = 0; j < g.length; j++) {
|
||
|
if (g[j] != r[j]) {
|
||
|
throw new RuntimeException("verify " + name + ": arrays must have same content:" +
|
||
|
" gold[" + i + "][" + j + "] = " + g[j] +
|
||
|
" result[" + i + "][" + j + "] = " + r[j]);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void verifyL(String name, int i, long[] g, long[] r) {
|
||
|
for (int j = 0; j < g.length; j++) {
|
||
|
if (g[j] != r[j]) {
|
||
|
throw new RuntimeException("verify " + name + ": arrays must have same content:" +
|
||
|
" gold[" + i + "][" + j + "] = " + g[j] +
|
||
|
" result[" + i + "][" + j + "] = " + r[j]);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIf = {"MaxVectorSize", ">=8"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test0(byte[] a, byte[] b, byte mask) {
|
||
|
for (int i = 0; i < RANGE; i+=8) {
|
||
|
// Safe to vectorize with AlignVector
|
||
|
b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
|
||
|
b[i+1] = (byte)(a[i+1] & mask);
|
||
|
b[i+2] = (byte)(a[i+2] & mask);
|
||
|
b[i+3] = (byte)(a[i+3] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
|
||
|
IRNode.AND_VB, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
|
||
|
static Object[] test1(byte[] a, byte[] b, byte mask) {
|
||
|
for (int i = 0; i < RANGE; i+=8) {
|
||
|
// Safe to vectorize with AlignVector
|
||
|
b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
|
||
|
b[i+1] = (byte)(a[i+1] & mask);
|
||
|
b[i+2] = (byte)(a[i+2] & mask);
|
||
|
b[i+3] = (byte)(a[i+3] & mask);
|
||
|
b[i+4] = (byte)(a[i+4] & mask);
|
||
|
b[i+5] = (byte)(a[i+5] & mask);
|
||
|
b[i+6] = (byte)(a[i+6] & mask);
|
||
|
b[i+7] = (byte)(a[i+7] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
|
||
|
IRNode.AND_VB, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test2(byte[] a, byte[] b, byte mask) {
|
||
|
for (int i = 0; i < RANGE; i+=8) {
|
||
|
// Cannot align with AlignVector: 3 + x * 8 % 8 = 3
|
||
|
b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
|
||
|
b[i+4] = (byte)(a[i+4] & mask);
|
||
|
b[i+5] = (byte)(a[i+5] & mask);
|
||
|
b[i+6] = (byte)(a[i+6] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
|
||
|
IRNode.AND_VB, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test3(byte[] a, byte[] b, byte mask) {
|
||
|
for (int i = 0; i < RANGE; i+=8) {
|
||
|
// Cannot align with AlignVector: 3 + x * 8 % 8 = 3
|
||
|
|
||
|
// Problematic for AlignVector
|
||
|
b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0
|
||
|
|
||
|
b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes
|
||
|
b[i+4] = (byte)(a[i+4] & mask);
|
||
|
b[i+5] = (byte)(a[i+5] & mask);
|
||
|
b[i+6] = (byte)(a[i+6] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0",
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"})
|
||
|
static Object[] test4(byte[] a, byte[] b, byte mask) {
|
||
|
for (int i = 0; i < RANGE/16; i++) {
|
||
|
// Problematic for AlignVector
|
||
|
b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned
|
||
|
b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask);
|
||
|
b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask);
|
||
|
b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask);
|
||
|
|
||
|
b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned
|
||
|
b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask);
|
||
|
b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask);
|
||
|
b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask);
|
||
|
b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask);
|
||
|
b[i*16 + 10] = (byte)(a[i*16 + 10] & mask);
|
||
|
b[i*16 + 11] = (byte)(a[i*16 + 11] & mask);
|
||
|
b[i*16 + 12] = (byte)(a[i*16 + 12] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
|
||
|
IRNode.AND_VB, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test5(byte[] a, byte[] b, byte mask, int inv) {
|
||
|
for (int i = 0; i < RANGE; i+=8) {
|
||
|
// Cannot align with AlignVector because of invariant
|
||
|
b[i+inv+0] = (byte)(a[i+inv+0] & mask);
|
||
|
|
||
|
b[i+inv+3] = (byte)(a[i+inv+3] & mask);
|
||
|
b[i+inv+4] = (byte)(a[i+inv+4] & mask);
|
||
|
b[i+inv+5] = (byte)(a[i+inv+5] & mask);
|
||
|
b[i+inv+6] = (byte)(a[i+inv+6] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
|
||
|
IRNode.AND_VB, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test6(byte[] a, byte[] b, byte mask) {
|
||
|
for (int i = 0; i < RANGE/8; i+=2) {
|
||
|
// Cannot align with AlignVector because offset is odd
|
||
|
b[i*4+0] = (byte)(a[i*4+0] & mask);
|
||
|
|
||
|
b[i*4+3] = (byte)(a[i*4+3] & mask);
|
||
|
b[i*4+4] = (byte)(a[i*4+4] & mask);
|
||
|
b[i*4+5] = (byte)(a[i*4+5] & mask);
|
||
|
b[i*4+6] = (byte)(a[i*4+6] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
|
||
|
IRNode.AND_VS, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test7(short[] a, short[] b, short mask) {
|
||
|
for (int i = 0; i < RANGE/8; i+=2) {
|
||
|
// Cannot align with AlignVector because offset is odd
|
||
|
b[i*4+0] = (short)(a[i*4+0] & mask);
|
||
|
|
||
|
b[i*4+3] = (short)(a[i*4+3] & mask);
|
||
|
b[i*4+4] = (short)(a[i*4+4] & mask);
|
||
|
b[i*4+5] = (short)(a[i*4+5] & mask);
|
||
|
b[i*4+6] = (short)(a[i*4+6] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
|
||
|
IRNode.AND_VB, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test8(byte[] a, byte[] b, byte mask, int init) {
|
||
|
for (int i = init; i < RANGE; i+=8) {
|
||
|
// Cannot align with AlignVector because of invariant (variable init becomes invar)
|
||
|
b[i+0] = (byte)(a[i+0] & mask);
|
||
|
|
||
|
b[i+3] = (byte)(a[i+3] & mask);
|
||
|
b[i+4] = (byte)(a[i+4] & mask);
|
||
|
b[i+5] = (byte)(a[i+5] & mask);
|
||
|
b[i+6] = (byte)(a[i+6] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIf = {"MaxVectorSize", ">=8"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test9(byte[] a, byte[] b, byte mask) {
|
||
|
// known non-zero init value does not affect offset, but has implicit effect on iv
|
||
|
for (int i = 13; i < RANGE-8; i+=8) {
|
||
|
b[i+0] = (byte)(a[i+0] & mask);
|
||
|
|
||
|
b[i+3] = (byte)(a[i+3] & mask);
|
||
|
b[i+4] = (byte)(a[i+4] & mask);
|
||
|
b[i+5] = (byte)(a[i+5] & mask);
|
||
|
b[i+6] = (byte)(a[i+6] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
|
||
|
IRNode.AND_VB, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test10a(byte[] a, byte[] b, byte mask) {
|
||
|
// This is not alignable with pre-loop, because of odd init.
|
||
|
for (int i = 3; i < RANGE-8; i+=8) {
|
||
|
b[i+0] = (byte)(a[i+0] & mask);
|
||
|
b[i+1] = (byte)(a[i+1] & mask);
|
||
|
b[i+2] = (byte)(a[i+2] & mask);
|
||
|
b[i+3] = (byte)(a[i+3] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
|
||
|
IRNode.AND_VB, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test10b(byte[] a, byte[] b, byte mask) {
|
||
|
// This is not alignable with pre-loop, because of odd init.
|
||
|
// Seems not correctly handled.
|
||
|
for (int i = 13; i < RANGE-8; i+=8) {
|
||
|
b[i+0] = (byte)(a[i+0] & mask);
|
||
|
b[i+1] = (byte)(a[i+1] & mask);
|
||
|
b[i+2] = (byte)(a[i+2] & mask);
|
||
|
b[i+3] = (byte)(a[i+3] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
|
||
|
IRNode.AND_VS, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test10c(short[] a, short[] b, short mask) {
|
||
|
// This is not alignable with pre-loop, because of odd init.
|
||
|
// Seems not correctly handled with MaxVectorSize >= 32.
|
||
|
for (int i = 13; i < RANGE-8; i+=8) {
|
||
|
b[i+0] = (short)(a[i+0] & mask);
|
||
|
b[i+1] = (short)(a[i+1] & mask);
|
||
|
b[i+2] = (short)(a[i+2] & mask);
|
||
|
b[i+3] = (short)(a[i+3] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIf = {"MaxVectorSize", ">=16"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
|
||
|
static Object[] test10d(short[] a, short[] b, short mask) {
|
||
|
for (int i = 13; i < RANGE-16; i+=8) {
|
||
|
// init + offset -> aligned
|
||
|
b[i+0+3] = (short)(a[i+0+3] & mask);
|
||
|
b[i+1+3] = (short)(a[i+1+3] & mask);
|
||
|
b[i+2+3] = (short)(a[i+2+3] & mask);
|
||
|
b[i+3+3] = (short)(a[i+3+3] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
|
||
|
IRNode.AND_VB, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11aB(byte[] a, byte[] b, byte mask) {
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
// always alignable
|
||
|
b[i+0] = (byte)(a[i+0] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
|
||
|
IRNode.AND_VS, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11aS(short[] a, short[] b, short mask) {
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
// always alignable
|
||
|
b[i+0] = (short)(a[i+0] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||
|
IRNode.AND_VI, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11aI(int[] a, int[] b, int mask) {
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
// always alignable
|
||
|
b[i+0] = (int)(a[i+0] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
||
|
IRNode.AND_VL, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11aL(long[] a, long[] b, long mask) {
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
// always alignable
|
||
|
b[i+0] = (long)(a[i+0] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
|
||
|
IRNode.AND_VB, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11bB(byte[] a, byte[] b, byte mask) {
|
||
|
for (int i = 1; i < RANGE; i++) {
|
||
|
// always alignable
|
||
|
b[i+0] = (byte)(a[i+0] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
|
||
|
IRNode.AND_VS, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11bS(short[] a, short[] b, short mask) {
|
||
|
for (int i = 1; i < RANGE; i++) {
|
||
|
// always alignable
|
||
|
b[i+0] = (short)(a[i+0] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||
|
IRNode.AND_VI, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11bI(int[] a, int[] b, int mask) {
|
||
|
for (int i = 1; i < RANGE; i++) {
|
||
|
// always alignable
|
||
|
b[i+0] = (int)(a[i+0] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
||
|
IRNode.AND_VL, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11bL(long[] a, long[] b, long mask) {
|
||
|
for (int i = 1; i < RANGE; i++) {
|
||
|
// always alignable
|
||
|
b[i+0] = (long)(a[i+0] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
|
||
|
IRNode.AND_VB, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "false"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
|
||
|
IRNode.AND_VB, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test11cB(byte[] a, byte[] b, byte mask) {
|
||
|
for (int i = 1; i < RANGE-1; i++) {
|
||
|
// 1 byte offset -> not alignable with AlignVector
|
||
|
b[i+0] = (byte)(a[i+1] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
|
||
|
IRNode.AND_VS, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "false"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
|
||
|
IRNode.AND_VS, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test11cS(short[] a, short[] b, short mask) {
|
||
|
for (int i = 1; i < RANGE-1; i++) {
|
||
|
// 2 byte offset -> not alignable with AlignVector
|
||
|
b[i+0] = (short)(a[i+1] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||
|
IRNode.AND_VI, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "false"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
|
||
|
IRNode.AND_VI, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test11cI(int[] a, int[] b, int mask) {
|
||
|
for (int i = 1; i < RANGE-1; i++) {
|
||
|
// 4 byte offset -> not alignable with AlignVector
|
||
|
b[i+0] = (int)(a[i+1] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
||
|
IRNode.AND_VL, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11cL(long[] a, long[] b, long mask) {
|
||
|
for (int i = 1; i < RANGE-1; i++) {
|
||
|
// always alignable (8 byte offset)
|
||
|
b[i+0] = (long)(a[i+1] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
|
||
|
IRNode.AND_VB, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) {
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
b[i+0+invar] = (byte)(a[i+0+invar] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
|
||
|
IRNode.AND_VS, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11dS(short[] a, short[] b, short mask, int invar) {
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
b[i+0+invar] = (short)(a[i+0+invar] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||
|
IRNode.AND_VI, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11dI(int[] a, int[] b, int mask, int invar) {
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
b[i+0+invar] = (int)(a[i+0+invar] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
||
|
IRNode.AND_VL, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test11dL(long[] a, long[] b, long mask, int invar) {
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
b[i+0+invar] = (long)(a[i+0+invar] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
|
||
|
IRNode.AND_VB, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test12(byte[] a, byte[] b, byte mask) {
|
||
|
for (int i = 0; i < RANGE/16; i++) {
|
||
|
// Currently does not vectorize at all
|
||
|
b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask);
|
||
|
b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask);
|
||
|
b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask);
|
||
|
b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask);
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
|
||
|
IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
|
||
|
IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
|
||
|
IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"avx2", "true"})
|
||
|
// require avx to ensure vectors are larger than what unrolling produces
|
||
|
static Object[] test13aIL(int[] a, long[] b) {
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
a[i]++;
|
||
|
b[i]++;
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
|
||
|
IRNode.LOAD_VECTOR_I, "> 0",
|
||
|
IRNode.ADD_VB, "> 0",
|
||
|
IRNode.ADD_VI, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
|
||
|
static Object[] test13aIB(int[] a, byte[] b) {
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
a[i]++;
|
||
|
b[i]++;
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||
|
IRNode.LOAD_VECTOR_S, "> 0",
|
||
|
IRNode.ADD_VI, "> 0",
|
||
|
IRNode.ADD_VS, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
|
||
|
static Object[] test13aIS(int[] a, short[] b) {
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
a[i]++;
|
||
|
b[i]++;
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
|
||
|
IRNode.LOAD_VECTOR_S, "> 0",
|
||
|
IRNode.LOAD_VECTOR_I, "> 0",
|
||
|
IRNode.LOAD_VECTOR_L, "> 0",
|
||
|
IRNode.ADD_VB, "> 0",
|
||
|
IRNode.ADD_VS, "> 0",
|
||
|
IRNode.ADD_VI, "> 0",
|
||
|
IRNode.ADD_VL, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
|
||
|
static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
a[i]++;
|
||
|
b[i]++;
|
||
|
c[i]++;
|
||
|
d[i]++;
|
||
|
}
|
||
|
return new Object[]{ a, b, c, d };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
|
||
|
IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
|
||
|
IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
|
||
|
IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"avx2", "true"})
|
||
|
// require avx to ensure vectors are larger than what unrolling produces
|
||
|
static Object[] test13bIL(int[] a, long[] b) {
|
||
|
for (int i = 1; i < RANGE; i++) {
|
||
|
a[i]++;
|
||
|
b[i]++;
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
|
||
|
IRNode.LOAD_VECTOR_I, "> 0",
|
||
|
IRNode.ADD_VB, "> 0",
|
||
|
IRNode.ADD_VI, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
|
||
|
static Object[] test13bIB(int[] a, byte[] b) {
|
||
|
for (int i = 1; i < RANGE; i++) {
|
||
|
a[i]++;
|
||
|
b[i]++;
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||
|
IRNode.LOAD_VECTOR_S, "> 0",
|
||
|
IRNode.ADD_VI, "> 0",
|
||
|
IRNode.ADD_VS, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
|
||
|
static Object[] test13bIS(int[] a, short[] b) {
|
||
|
for (int i = 1; i < RANGE; i++) {
|
||
|
a[i]++;
|
||
|
b[i]++;
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
|
||
|
IRNode.LOAD_VECTOR_S, "> 0",
|
||
|
IRNode.LOAD_VECTOR_I, "> 0",
|
||
|
IRNode.LOAD_VECTOR_L, "> 0",
|
||
|
IRNode.ADD_VB, "> 0",
|
||
|
IRNode.ADD_VS, "> 0",
|
||
|
IRNode.ADD_VI, "> 0",
|
||
|
IRNode.ADD_VL, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
|
||
|
static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
|
||
|
for (int i = 1; i < RANGE; i++) {
|
||
|
a[i]++;
|
||
|
b[i]++;
|
||
|
c[i]++;
|
||
|
d[i]++;
|
||
|
}
|
||
|
return new Object[]{ a, b, c, d };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
|
||
|
IRNode.ADD_VB, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "false"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
|
||
|
IRNode.ADD_VB, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test14aB(byte[] a) {
|
||
|
// non-power-of-2 stride
|
||
|
for (int i = 0; i < RANGE-20; i+=9) {
|
||
|
a[i+0]++;
|
||
|
a[i+1]++;
|
||
|
a[i+2]++;
|
||
|
a[i+3]++;
|
||
|
a[i+4]++;
|
||
|
a[i+5]++;
|
||
|
a[i+6]++;
|
||
|
a[i+7]++;
|
||
|
a[i+8]++;
|
||
|
a[i+9]++;
|
||
|
a[i+10]++;
|
||
|
a[i+11]++;
|
||
|
a[i+12]++;
|
||
|
a[i+13]++;
|
||
|
a[i+14]++;
|
||
|
a[i+15]++;
|
||
|
}
|
||
|
return new Object[]{ a };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
|
||
|
IRNode.ADD_VB, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "false"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
|
||
|
IRNode.ADD_VB, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test14bB(byte[] a) {
|
||
|
// non-power-of-2 stride
|
||
|
for (int i = 0; i < RANGE-20; i+=3) {
|
||
|
a[i+0]++;
|
||
|
a[i+1]++;
|
||
|
a[i+2]++;
|
||
|
a[i+3]++;
|
||
|
a[i+4]++;
|
||
|
a[i+5]++;
|
||
|
a[i+6]++;
|
||
|
a[i+7]++;
|
||
|
a[i+8]++;
|
||
|
a[i+9]++;
|
||
|
a[i+10]++;
|
||
|
a[i+11]++;
|
||
|
a[i+12]++;
|
||
|
a[i+13]++;
|
||
|
a[i+14]++;
|
||
|
a[i+15]++;
|
||
|
}
|
||
|
return new Object[]{ a };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
|
||
|
IRNode.ADD_VB, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "false"})
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
|
||
|
IRNode.ADD_VB, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test14cB(byte[] a) {
|
||
|
// non-power-of-2 stride
|
||
|
for (int i = 0; i < RANGE-20; i+=5) {
|
||
|
a[i+0]++;
|
||
|
a[i+1]++;
|
||
|
a[i+2]++;
|
||
|
a[i+3]++;
|
||
|
a[i+4]++;
|
||
|
a[i+5]++;
|
||
|
a[i+6]++;
|
||
|
a[i+7]++;
|
||
|
a[i+8]++;
|
||
|
a[i+9]++;
|
||
|
a[i+10]++;
|
||
|
a[i+11]++;
|
||
|
a[i+12]++;
|
||
|
a[i+13]++;
|
||
|
a[i+14]++;
|
||
|
a[i+15]++;
|
||
|
}
|
||
|
return new Object[]{ a };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
// IR rules difficult because of modulo wrapping with offset after peeling.
|
||
|
static Object[] test15aB(byte[] a) {
|
||
|
// non-power-of-2 scale
|
||
|
for (int i = 0; i < RANGE/64-20; i++) {
|
||
|
a[53*i+0]++;
|
||
|
a[53*i+1]++;
|
||
|
a[53*i+2]++;
|
||
|
a[53*i+3]++;
|
||
|
a[53*i+4]++;
|
||
|
a[53*i+5]++;
|
||
|
a[53*i+6]++;
|
||
|
a[53*i+7]++;
|
||
|
a[53*i+8]++;
|
||
|
a[53*i+9]++;
|
||
|
a[53*i+10]++;
|
||
|
a[53*i+11]++;
|
||
|
a[53*i+12]++;
|
||
|
a[53*i+13]++;
|
||
|
a[53*i+14]++;
|
||
|
a[53*i+15]++;
|
||
|
}
|
||
|
return new Object[]{ a };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
// IR rules difficult because of modulo wrapping with offset after peeling.
|
||
|
static Object[] test15bB(byte[] a) {
|
||
|
// non-power-of-2 scale
|
||
|
for (int i = 0; i < RANGE/64-20; i++) {
|
||
|
a[25*i+0]++;
|
||
|
a[25*i+1]++;
|
||
|
a[25*i+2]++;
|
||
|
a[25*i+3]++;
|
||
|
a[25*i+4]++;
|
||
|
a[25*i+5]++;
|
||
|
a[25*i+6]++;
|
||
|
a[25*i+7]++;
|
||
|
a[25*i+8]++;
|
||
|
a[25*i+9]++;
|
||
|
a[25*i+10]++;
|
||
|
a[25*i+11]++;
|
||
|
a[25*i+12]++;
|
||
|
a[25*i+13]++;
|
||
|
a[25*i+14]++;
|
||
|
a[25*i+15]++;
|
||
|
}
|
||
|
return new Object[]{ a };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
// IR rules difficult because of modulo wrapping with offset after peeling.
|
||
|
static Object[] test15cB(byte[] a) {
|
||
|
// non-power-of-2 scale
|
||
|
for (int i = 0; i < RANGE/64-20; i++) {
|
||
|
a[19*i+0]++;
|
||
|
a[19*i+1]++;
|
||
|
a[19*i+2]++;
|
||
|
a[19*i+3]++;
|
||
|
a[19*i+4]++;
|
||
|
a[19*i+5]++;
|
||
|
a[19*i+6]++;
|
||
|
a[19*i+7]++;
|
||
|
a[19*i+8]++;
|
||
|
a[19*i+9]++;
|
||
|
a[19*i+10]++;
|
||
|
a[19*i+11]++;
|
||
|
a[19*i+12]++;
|
||
|
a[19*i+13]++;
|
||
|
a[19*i+14]++;
|
||
|
a[19*i+15]++;
|
||
|
}
|
||
|
return new Object[]{ a };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
static Object[] test16a(byte[] a, short[] b) {
|
||
|
// infinite loop issues
|
||
|
for (int i = 0; i < RANGE/2-20; i++) {
|
||
|
a[2*i+0]++;
|
||
|
a[2*i+1]++;
|
||
|
a[2*i+2]++;
|
||
|
a[2*i+3]++;
|
||
|
a[2*i+4]++;
|
||
|
a[2*i+5]++;
|
||
|
a[2*i+6]++;
|
||
|
a[2*i+7]++;
|
||
|
a[2*i+8]++;
|
||
|
a[2*i+9]++;
|
||
|
a[2*i+10]++;
|
||
|
a[2*i+11]++;
|
||
|
a[2*i+12]++;
|
||
|
a[2*i+13]++;
|
||
|
a[2*i+14]++;
|
||
|
|
||
|
b[2*i+0]++;
|
||
|
b[2*i+1]++;
|
||
|
b[2*i+2]++;
|
||
|
b[2*i+3]++;
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
static Object[] test16b(byte[] a) {
|
||
|
// infinite loop issues
|
||
|
for (int i = 0; i < RANGE/2-20; i++) {
|
||
|
a[2*i+0]++;
|
||
|
a[2*i+1]++;
|
||
|
a[2*i+2]++;
|
||
|
a[2*i+3]++;
|
||
|
a[2*i+4]++;
|
||
|
a[2*i+5]++;
|
||
|
a[2*i+6]++;
|
||
|
a[2*i+7]++;
|
||
|
a[2*i+8]++;
|
||
|
a[2*i+9]++;
|
||
|
a[2*i+10]++;
|
||
|
a[2*i+11]++;
|
||
|
a[2*i+12]++;
|
||
|
a[2*i+13]++;
|
||
|
a[2*i+14]++;
|
||
|
}
|
||
|
return new Object[]{ a };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
||
|
IRNode.ADD_VL, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test17a(long[] a) {
|
||
|
// Unsafe: vectorizes with profiling (not xcomp)
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i;
|
||
|
long v = UNSAFE.getLongUnaligned(a, adr);
|
||
|
UNSAFE.putLongUnaligned(a, adr, v + 1);
|
||
|
}
|
||
|
return new Object[]{ a };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
// Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs.
|
||
|
static Object[] test17b(long[] a) {
|
||
|
// Not alignable
|
||
|
for (int i = 0; i < RANGE-1; i++) {
|
||
|
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1;
|
||
|
long v = UNSAFE.getLongUnaligned(a, adr);
|
||
|
UNSAFE.putLongUnaligned(a, adr, v + 1);
|
||
|
}
|
||
|
return new Object[]{ a };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
|
||
|
IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIf = {"MaxVectorSize", ">=32"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||
|
static Object[] test17c(long[] a) {
|
||
|
// Unsafe: aligned vectorizes
|
||
|
for (int i = 0; i < RANGE-1; i+=4) {
|
||
|
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i;
|
||
|
long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
|
||
|
long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
|
||
|
UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
|
||
|
UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
|
||
|
}
|
||
|
return new Object[]{ a };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
|
||
|
IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
|
||
|
IRNode.STORE_VECTOR, "> 0"},
|
||
|
applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"})
|
||
|
// Ensure vector width is large enough to fit 64 byte for longs:
|
||
|
// The offsets are: 25, 33, 57, 65
|
||
|
// In modulo 32: 25, 1, 25, 1 -> does not vectorize
|
||
|
// In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes
|
||
|
// This problem is because we compute modulo vector width in memory_alignment.
|
||
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "= 0",
|
||
|
IRNode.ADD_VL, "= 0",
|
||
|
IRNode.STORE_VECTOR, "= 0"},
|
||
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||
|
applyIfPlatform = {"64-bit", "true"},
|
||
|
applyIf = {"AlignVector", "true"})
|
||
|
static Object[] test17d(long[] a) {
|
||
|
// Not alignable
|
||
|
for (int i = 0; i < RANGE-1; i+=4) {
|
||
|
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1;
|
||
|
long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
|
||
|
long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
|
||
|
UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
|
||
|
UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
|
||
|
}
|
||
|
return new Object[]{ a };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
static Object[] test18a(byte[] a, int[] b) {
|
||
|
// scale = 0 --> no iv
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
a[0] = 1;
|
||
|
b[i] = 2;
|
||
|
a[1] = 1;
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
static Object[] test18b(byte[] a, int[] b) {
|
||
|
// scale = 0 --> no iv
|
||
|
for (int i = 0; i < RANGE; i++) {
|
||
|
a[1] = 1;
|
||
|
b[i] = 2;
|
||
|
a[2] = 1;
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
static Object[] test19(int[] a, int[] b) {
|
||
|
for (int i = 5000; i > 0; i--) {
|
||
|
a[RANGE_FINAL - i] = b[RANGE_FINAL - i];
|
||
|
}
|
||
|
return new Object[]{ a, b };
|
||
|
}
|
||
|
|
||
|
@Test
|
||
|
static Object[] test20(byte[] a) {
|
||
|
// Example where it is easy to pass alignment check,
|
||
|
// but used to fail the alignment calculation
|
||
|
for (int i = 1; i < RANGE/2-50; i++) {
|
||
|
a[2*i+0+30]++;
|
||
|
a[2*i+1+30]++;
|
||
|
a[2*i+2+30]++;
|
||
|
a[2*i+3+30]++;
|
||
|
}
|
||
|
return new Object[]{ a };
|
||
|
}
|
||
|
}
|