jdk-24/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java
2024-11-20 14:23:57 +00:00

971 lines
42 KiB
Java

/*
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test id=vanilla-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets vanilla-A
*/
/*
* @test id=vanilla-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets vanilla-U
*/
/*
* @test id=sse4-v016-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*sse4.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v016-A
*/
/*
* @test id=sse4-v016-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*sse4.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v016-U
*/
/*
* @test id=sse4-v008-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*sse4.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v008-A
*/
/*
* @test id=sse4-v008-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*sse4.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v008-U
*/
/*
* @test id=sse4-v004-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*sse4.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v004-A
*/
/*
* @test id=sse4-v004-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*sse4.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v004-U
*/
/*
* @test id=avx1-v032-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx1-v032-A
*/
/*
* @test id=avx1-v032-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx1-v032-U
*/
/*
* @test id=avx1-v016-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx1-v016-A
*/
/*
* @test id=avx1-v016-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx1-v016-U
*/
/*
* @test id=avx2-v032-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx2.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx2-v032-A
*/
/*
* @test id=avx2-v032-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx2.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx2-v032-U
*/
/*
* @test id=avx2-v016-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx2.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx2-v016-A
*/
/*
* @test id=avx2-v016-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx2.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx2-v016-U
*/
/*
* @test id=avx512-v064-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx512.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx512-v064-A
*/
/*
* @test id=avx512-v064-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx512.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx512-v064-U
*/
/*
* @test id=avx512-v032-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx512.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx512-v032-A
*/
/*
* @test id=avx512-v032-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx512.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx512-v032-U
*/
/*
* @test id=avx512bw-v064-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx512bw.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx512bw-v064-A
*/
/*
* @test id=avx512bw-v064-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx512bw.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx512bw-v064-U
*/
/*
* @test id=avx512bw-v032-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx512bw.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx512bw-v032-A
*/
/*
* @test id=avx512bw-v032-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
* @requires vm.cpu.features ~= ".*avx512bw.*"
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets avx512bw-v032-U
*/
/*
* @test id=vec-v064-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v064-A
*/
/*
* @test id=vec-v064-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v064-U
*/
/*
* @test id=vec-v032-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v032-A
*/
/*
* @test id=vec-v032-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v032-U
*/
/*
* @test id=vec-v016-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v016-A
*/
/*
* @test id=vec-v016-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v016-U
*/
/*
* @test id=vec-v008-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v008-A
*/
/*
* @test id=vec-v008-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v008-U
*/
/*
* @test id=vec-v004-A
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v004-A
*/
/*
* @test id=vec-v004-U
* @bug 8298935 8308606 8310308 8312570 8310190
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
* @requires vm.compiler2.enabled
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
* @library /test/lib /
* @compile ../../lib/ir_framework/TestFramework.java
* @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v004-U
*/
package compiler.loopopts.superword;
import compiler.lib.ir_framework.*;
import compiler.lib.compile_framework.*;
import jdk.test.lib.Utils;
import java.util.Arrays;
import java.util.stream.Collectors;
import java.util.ArrayList;
import java.util.List;
import java.util.HashSet;
import java.util.Set;
import java.util.HashMap;
import java.util.Random;
/*
* We want to test SuperWord / AutoVectorization with different constant offsets (positive and negative):
* for (int i = ...) { a[i + offset] = b[i] * 11; }
*
* To test aliasing, we have 3 modes: single-array, aliasing and non-aliasing.
* We test for various primitive types (int, long, short, char, byte, float, double).
* We run all test under various settings of MaxVectorSize and +-AlignVector.
* Finally, we verify the results and check that vectors of the expected length were created (IR rules).
*/
public class TestDependencyOffsets {
private static final Random RANDOM = Utils.getRandomInstance();
private static final int SIZE = 5_000 + RANDOM.nextInt(1000);
/*
* Template for the inner test class.
*/
private static String generate(CompileFramework comp, String[] flags) {
return String.format("""
import compiler.lib.ir_framework.*;
public class InnerTest {
private static int SIZE = %s;
public static void main(String args[]) {
TestFramework framework = new TestFramework(InnerTest.class);
framework.addFlags("-classpath", "%s");
framework.addFlags(%s);
framework.setDefaultWarmup(0);
framework.start();
}
// ------------------------- Init ---------------------------
%s
// ------------------------- Verify -------------------------
%s
// ------------------------- Tests --------------------------
%s
}
""",
SIZE,
comp.getEscapedClassPathOfCompiledClasses(),
Arrays.stream(flags).map(s -> "\"" + s + "\"").collect(Collectors.joining(", ")),
Arrays.stream(TYPES).map(Type::generateInit).collect(Collectors.joining("\n")),
Arrays.stream(TYPES).map(Type::generateVerify).collect(Collectors.joining("\n")),
getTests().stream().map(TestDefinition::generate).collect(Collectors.joining("\n")));
}
public static void main(String[] args) {
if (args.length != 1) {
throw new RuntimeException("Test requires exactly one argument!");
}
String[] flags = switch (args[0]) {
case "vanilla-A" -> new String[] {"-XX:+AlignVector"};
case "vanilla-U" -> new String[] {"-XX:-AlignVector"};
case "sse4-v016-A" -> new String[] {"-XX:UseSSE=4", "-XX:MaxVectorSize=16", "-XX:+AlignVector"};
case "sse4-v016-U" -> new String[] {"-XX:UseSSE=4", "-XX:MaxVectorSize=16", "-XX:-AlignVector"};
case "sse4-v008-A" -> new String[] {"-XX:UseSSE=4", "-XX:MaxVectorSize=8", "-XX:+AlignVector"};
case "sse4-v008-U" -> new String[] {"-XX:UseSSE=4", "-XX:MaxVectorSize=8", "-XX:-AlignVector"};
case "sse4-v004-A" -> new String[] {"-XX:UseSSE=4", "-XX:MaxVectorSize=4", "-XX:+AlignVector"};
case "sse4-v004-U" -> new String[] {"-XX:UseSSE=4", "-XX:MaxVectorSize=4", "-XX:-AlignVector"};
case "avx1-v032-A" -> new String[] {"-XX:UseAVX=1", "-XX:MaxVectorSize=32", "-XX:+AlignVector"};
case "avx1-v032-U" -> new String[] {"-XX:UseAVX=1", "-XX:MaxVectorSize=32", "-XX:-AlignVector"};
case "avx1-v016-A" -> new String[] {"-XX:UseAVX=1", "-XX:MaxVectorSize=16", "-XX:+AlignVector"};
case "avx1-v016-U" -> new String[] {"-XX:UseAVX=1", "-XX:MaxVectorSize=16", "-XX:-AlignVector"};
case "avx2-v032-A" -> new String[] {"-XX:UseAVX=2", "-XX:MaxVectorSize=32", "-XX:+AlignVector"};
case "avx2-v032-U" -> new String[] {"-XX:UseAVX=2", "-XX:MaxVectorSize=32", "-XX:-AlignVector"};
case "avx2-v016-A" -> new String[] {"-XX:UseAVX=2", "-XX:MaxVectorSize=16", "-XX:+AlignVector"};
case "avx2-v016-U" -> new String[] {"-XX:UseAVX=2", "-XX:MaxVectorSize=16", "-XX:-AlignVector"};
case "avx512-v064-A" -> new String[] {"-XX:UseAVX=3", "-XX:+UseKNLSetting", "-XX:MaxVectorSize=64", "-XX:+AlignVector"};
case "avx512-v064-U" -> new String[] {"-XX:UseAVX=3", "-XX:+UseKNLSetting", "-XX:MaxVectorSize=64", "-XX:-AlignVector"};
case "avx512-v032-A" -> new String[] {"-XX:UseAVX=3", "-XX:+UseKNLSetting", "-XX:MaxVectorSize=32", "-XX:+AlignVector"};
case "avx512-v032-U" -> new String[] {"-XX:UseAVX=3", "-XX:+UseKNLSetting", "-XX:MaxVectorSize=32", "-XX:-AlignVector"};
case "avx512bw-v064-A" -> new String[] {"-XX:UseAVX=3", "-XX:MaxVectorSize=64", "-XX:+AlignVector"};
case "avx512bw-v064-U" -> new String[] {"-XX:UseAVX=3", "-XX:MaxVectorSize=64", "-XX:-AlignVector"};
case "avx512bw-v032-A" -> new String[] {"-XX:UseAVX=3", "-XX:MaxVectorSize=32", "-XX:+AlignVector"};
case "avx512bw-v032-U" -> new String[] {"-XX:UseAVX=3", "-XX:MaxVectorSize=32", "-XX:-AlignVector"};
case "vec-v064-A" -> new String[] {"-XX:MaxVectorSize=64", "-XX:+AlignVector"};
case "vec-v064-U" -> new String[] {"-XX:MaxVectorSize=64", "-XX:-AlignVector"};
case "vec-v032-A" -> new String[] {"-XX:MaxVectorSize=32", "-XX:+AlignVector"};
case "vec-v032-U" -> new String[] {"-XX:MaxVectorSize=32", "-XX:-AlignVector"};
case "vec-v016-A" -> new String[] {"-XX:MaxVectorSize=16", "-XX:+AlignVector"};
case "vec-v016-U" -> new String[] {"-XX:MaxVectorSize=16", "-XX:-AlignVector"};
case "vec-v008-A" -> new String[] {"-XX:MaxVectorSize=8", "-XX:+AlignVector"};
case "vec-v008-U" -> new String[] {"-XX:MaxVectorSize=8", "-XX:-AlignVector"};
case "vec-v004-A" -> new String[] {"-XX:MaxVectorSize=4", "-XX:+AlignVector"};
case "vec-v004-U" -> new String[] {"-XX:MaxVectorSize=4", "-XX:-AlignVector"};
default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
};
CompileFramework comp = new CompileFramework();
long time0 = System.currentTimeMillis();
comp.addJavaSourceCode("InnerTest", generate(comp, flags));
long time1 = System.currentTimeMillis();
comp.compile();
long time2 = System.currentTimeMillis();
comp.invoke("InnerTest", "main", new Object[] {null});
long time3 = System.currentTimeMillis();
System.out.println("Generate: " + (time1 - time0));
System.out.println("Compile: " + (time2 - time1));
System.out.println("Run: " + (time3 - time2));
}
static record Type (String name, int size, String value, String operator, String irNode) {
String letter() {
return name.substring(0, 1).toUpperCase();
}
/*
* Template for init method generation.
*/
String generateInit() {
return String.format("""
static void init(%s[] a, %s[] b) {
for (int i = 0; i < SIZE; i++) {
a[i] = (%s)(2 * i);
b[i] = (%s)(3 * i);
}
}
""",
name, name, name, name);
}
/*
* Template for verify method generation.
*/
String generateVerify() {
return String.format("""
static void verify(String context, %s[] aTest, %s[] bTest, %s[] aGold, %s[] bGold) {
for (int i = 0; i < SIZE; i++) {
if (aTest[i] != aGold[i] || bTest[i] != bGold[i]) {
throw new RuntimeException("Wrong result in " + context + " at i=" + i + ": " +
"aTest=" + aTest[i] + ", aGold=" + aGold[i] +
"bTest=" + bTest[i] + ", bGold=" + bGold[i]);
}
}
}
""",
name, name, name, name);
}
}
static final Type[] TYPES = new Type[] {
new Type("int", 4, "-11", "*", "MUL_VI"),
new Type("long", 8, "-11", "+", "ADD_VL"), // aarch64 NEON does not support MulVL
new Type("short", 2, "-11", "*", "MUL_VS"),
new Type("char", 2, "-11", "*", "MUL_VS"), // char behaves like short
new Type("byte", 1, "11", "*", "MUL_VB"),
new Type("float", 4, "1.001f", "*", "MUL_VF"),
new Type("double", 8, "1.001", "*", "MUL_VD"),
};
/*
* Every CPU can define its own Matcher::min_vector_size. This happens to be different for
* our targeted platforms: x86 / sse4.1 and aarch64 / asimd.
*/
static record CPUMinVectorWidth (String applyIfCPUFeature, int minVectorWidth) {}
static final String SSE4_ASIMD = " applyIfCPUFeatureOr = {\"sse4.1\", \"true\", \"asimd\", \"true\"})\n";
static final String SSE4 = " applyIfCPUFeature = {\"sse4.1\", \"true\"})\n";
static final String ASIMD = " applyIfCPUFeature = {\"asimd\", \"true\"})\n";
static CPUMinVectorWidth[] getCPUMinVectorWidth(String typeName) {
return switch (typeName) {
case "byte" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 4 )};
case "char" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4, 4 ),
new CPUMinVectorWidth(ASIMD, 8 )};
case "short" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4, 4 ),
new CPUMinVectorWidth(ASIMD, 8 )};
case "int" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 8 )};
case "long" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 16)};
case "float" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 8 )};
case "double" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 16)};
default -> { throw new RuntimeException("type not supported: " + typeName); }
};
}
static List<Integer> getOffsets() {
// Some carefully hand-picked values
int[] always = new int[] {
0,
-1, 1,
-2, 2, // 2^1
-3, 3,
-4, 4, // 2^2
-7, 7,
-8, 8, // 2^3
-14, 14,
-16, 16, // 2^4
-18, 18,
-20, 20,
-31, 31,
-32, 32, // 2^5
-63, 63,
-64, 64, // 2^6
-65, 65,
-128, 128, // 2^7
-129, 129,
-192, 192, // 3 * 64
};
Set<Integer> set = Arrays.stream(always).boxed().collect(Collectors.toSet());
// Sample some random values on an exponential scale
for (int i = 0; i < 10; i++) {
int base = 4 << i;
int offset = base + RANDOM.nextInt(base);
set.add(offset);
set.add(-offset);
}
return new ArrayList<Integer>(set);
}
enum ExpectVectorization {
ALWAYS, // -> positive "count" IR rule
UNKNOWN, // -> disable IR rule
NEVER // -> negative "failOn" IR rule
};
static record TestDefinition (int id, Type type, int offset) {
/*
* Template for test generation, together with its static variables, static initialization,
* @IR rules and @Run method (initialization, execution and verification).
*/
String generate() {
int start = offset >= 0 ? 0 : -offset;
String end = offset >= 0 ? "SIZE - " + offset : "SIZE";
String aliasingComment;
String secondArgument;
String loadFrom;
boolean isSingleArray;
switch (RANDOM.nextInt(3)) {
case 0: // a[i + offset] = a[i]
isSingleArray = true;
aliasingComment = "single-array";
secondArgument = "a";
loadFrom = "a";
break;
case 1: // a[i + offset] = b[i], but a and b alias, i.e. at runtime a == b.
isSingleArray = false;
aliasingComment = "aliasing";
secondArgument = "a";
loadFrom = "b";
break;
case 2: // a[i + offset] = b[i], and a and b do not alias, i.e. at runtime a != b.
isSingleArray = false;
aliasingComment = "non-aliasing";
secondArgument = "b";
loadFrom = "b";
break;
default:
throw new RuntimeException("impossible");
}
return String.format("""
// test%d: type=%s, offset=%d, mode=%s
static %s[] aGold%d = new %s[SIZE];
static %s[] bGold%d = new %s[SIZE];
static %s[] aTest%d = new %s[SIZE];
static %s[] bTest%d = new %s[SIZE];
static {
init(aGold%d, bGold%d);
test%d(aGold%d, %sGold%d);
}
@Test
%s
public static void test%d(%s[] a, %s[] b) {
for (int i = %d; i < %s; i++) {
a[i + %d] = (%s)(%s[i] %s %s);
}
}
@Run(test = "test%s")
public static void run%s() {
init(aTest%d, bTest%d);
test%d(aTest%d, %sTest%d);
verify("test%d", aTest%d, bTest%d, aGold%d, bGold%d);
}
""",
// title
id, type.name, offset, aliasingComment,
// static
type.name, id, type.name,
type.name, id, type.name,
type.name, id, type.name,
type.name, id, type.name,
id, id, id, id, secondArgument, id,
// IR rules
generateIRRules(isSingleArray),
// test
id, type.name, type.name,
start, end,
offset, type.name, loadFrom, type.operator, type.value,
// run
id, id, id, id, id, id, secondArgument, id, id, id, id, id, id);
}
/*
* We generate a number of IR rules for every TestDefinition. If an what kind of vectorization we
* expect depends on AlignVector and MaxVectorSize, as well as the byteOffset between the load and
* store.
*/
String generateIRRules(boolean isSingleArray) {
StringBuilder builder = new StringBuilder();
for (CPUMinVectorWidth cm : getCPUMinVectorWidth(type.name)) {
String applyIfCPUFeature = cm.applyIfCPUFeature;
int minVectorWidth = cm.minVectorWidth;
builder.append(" // minVectorWidth = " + minVectorWidth + "\n");
int byteOffset = offset * type.size;
builder.append(" // byteOffset = " + byteOffset + " = offset * type.size\n");
// In a store-forward case, later iterations load from stores of previous iterations.
// If the offset is too small, that leads to cyclic dependencies in the vectors. Hence,
// we use shorter vectors to avoid cycles and still vectorize. Vector lengths have to
// be powers-of-2, and smaller or equal to the byteOffset. So we round down to the next
// power of two.
int infinity = 256; // No vector size is ever larger than this.
int maxVectorWidth = infinity; // no constraint by default
int log2 = 31 - Integer.numberOfLeadingZeros(offset);
int floorPow2Offset = 1 << log2;
if (0 < byteOffset && byteOffset < maxVectorWidth) {
maxVectorWidth = Math.min(maxVectorWidth, floorPow2Offset * type.size);
builder.append(" // Vectors must have at most " + floorPow2Offset +
" elements: maxVectorWidth = " + maxVectorWidth +
" to avoid cyclic dependency.\n");
}
ExpectVectorization expectVectorization = ExpectVectorization.ALWAYS;
if (isSingleArray && 0 < offset && offset < 64) {
// In a store-forward case at iteration distances below a certain threshold, and not there
// is some partial overlap between the expected vector store and some vector load in a later
// iteration, we avoid vectorization to avoid the latency penalties of store-to-load
// forwarding failure. We only detect these failures in single-array cases.
//
// Note: we currently never detect store-to-load-forwarding failures beyond 64 iterations,
// And so if the offset >= 64, we always expect vectorization.
//
// The condition for partial overlap:
// offset % #elements != 0
//
// But we do not know #elements exactly, only a range from min/maxVectorWidth.
int maxElements = maxVectorWidth / type.size;
int minElements = minVectorWidth / type.size;
boolean sometimesPartialOverlap = offset % maxElements != 0;
// If offset % minElements != 0, then it does also not hold for any larger vector.
boolean alwaysPartialOverlap = offset % minElements != 0;
if (alwaysPartialOverlap) {
// It is a little tricky to know the exact threshold. On all platforms and in all
// unrolling cases, it is between 8 and 64. Hence, we have these 3 cases:
if (offset <= 8) {
builder.append(" // We always detect store-to-load-forwarding failures -> never vectorize.\n");
expectVectorization = ExpectVectorization.NEVER;
} else if (offset <= 64) {
builder.append(" // Unknown if detect store-to-load-forwarding failures -> maybe disable IR rules.\n");
expectVectorization = ExpectVectorization.UNKNOWN;
} else {
// offset > 64 -> offset too large, expect no store-to-load-failure detection
throw new RuntimeException("impossible");
}
} else if (sometimesPartialOverlap && !alwaysPartialOverlap) {
builder.append(" // Partial overlap condition true: sometimes but not always -> maybe disable IR rules.\n");
expectVectorization = ExpectVectorization.UNKNOWN;
} else {
builder.append(" // Partial overlap never happens -> expect vectorization.\n");
expectVectorization = ExpectVectorization.ALWAYS;
}
}
// Rule 1: No strict alignment: -XX:-AlignVector
ExpectVectorization expectVectorization1 = expectVectorization;
IRRule r1 = new IRRule(type, type.irNode, applyIfCPUFeature);
r1.addApplyIf("\"AlignVector\", \"false\"");
r1.addApplyIf("\"MaxVectorSize\", \">=" + minVectorWidth + "\"");
if (maxVectorWidth < minVectorWidth) {
builder.append(" // maxVectorWidth < minVectorWidth -> expect no vectorization.\n");
expectVectorization1 = ExpectVectorization.NEVER;
} else if (maxVectorWidth < infinity) {
r1.setSize("min(" + (maxVectorWidth / type.size) + ",max_" + type.name + ")");
}
r1.setExpectVectVectorization(expectVectorization1);
r1.generate(builder);
// Rule 2: strict alignment: -XX:+AlignVector
ExpectVectorization expectVectorization2 = expectVectorization;
IRRule r2 = new IRRule(type, type.irNode, applyIfCPUFeature);
r2.addApplyIf("\"AlignVector\", \"true\"");
r2.addApplyIf("\"MaxVectorSize\", \">=" + minVectorWidth + "\"");
// All vectors must be aligned by some alignment width aw:
// aw = min(actualVectorWidth, ObjectAlignmentInBytes)
// The runtime aw must thus lay between these two values:
// awMin <= aw <= awMax
int awMin = Math.min(minVectorWidth, 8);
int awMax = 8;
// We must align both the load and the store, thus we must also be able to align
// for the difference of the two, i.e. byteOffset must be a multiple of aw:
// byteOffset % aw == 0
// We don't know the aw, only awMin and awMax. But:
// byteOffset % awMax == 0 -> byteOffset % aw == 0
// byteOffset % awMin != 0 -> byteOffset % aw != 0
builder.append(" // awMin = " + awMin + " = min(minVectorWidth, 8)\n");
builder.append(" // awMax = " + awMax + "\n");
if (byteOffset % awMax == 0) {
builder.append(" // byteOffset % awMax == 0 -> always trivially aligned\n");
} else if (byteOffset % awMin != 0) {
builder.append(" // byteOffset % awMin != 0 -> can never align -> expect no vectorization.\n");
expectVectorization2 = ExpectVectorization.NEVER;
} else {
if (expectVectorization2 != ExpectVectorization.NEVER) {
builder.append(" // Alignment unknown -> disable IR rule.\n");
expectVectorization2 = ExpectVectorization.UNKNOWN;
} else {
builder.append(" // Alignment unknown -> but already proved no vectorization above.\n");
}
}
if (maxVectorWidth < minVectorWidth) {
builder.append(" // Not at least 2 elements or 4 bytes -> expect no vectorization.\n");
expectVectorization2 = ExpectVectorization.NEVER;
} else if (maxVectorWidth < infinity) {
r2.setSize("min(" + (maxVectorWidth / type.size) + ",max_" + type.name + ")");
}
r2.setExpectVectVectorization(expectVectorization2);
r2.generate(builder);
}
return builder.toString();
}
}
static List<TestDefinition> getTests() {
List<TestDefinition> tests = new ArrayList<>();
// Cross product of all types and offsets.
int id = 0;
for (Type type : TYPES) {
for (int offset : getOffsets()) {
tests.add(new TestDefinition(id++, type, offset));
}
}
return tests;
}
static class IRRule {
Type type;
String irNode;
String applyIfCPUFeature;
String size;
boolean isEnabled;
boolean isPositiveRule;
ArrayList<String> applyIf;
IRRule(Type type, String irNode, String applyIfCPUFeature) {
this.type = type;
this.irNode = irNode;
this.applyIfCPUFeature = applyIfCPUFeature;
this.size = null;
this.isPositiveRule = true;
this.isEnabled = true;
this.applyIf = new ArrayList<String>();
}
void setSize(String size) {
this.size = size;
}
void setExpectVectVectorization(ExpectVectorization expectVectorization) {
switch(expectVectorization) {
case ExpectVectorization.NEVER -> { this.isPositiveRule = false; }
case ExpectVectorization.UNKNOWN -> { this.isEnabled = false; }
case ExpectVectorization.ALWAYS -> {}
}
}
void addApplyIf(String constraint) {
this.applyIf.add(constraint);
}
void generate(StringBuilder builder) {
if (!isEnabled) {
builder.append(" // No IR rule: disabled.\n");
} else {
builder.append(counts());
// applyIf
if (!applyIf.isEmpty()) {
builder.append(" applyIf");
builder.append(applyIf.size() > 1 ? "And" : "");
builder.append(" = {");
builder.append(String.join(", ", applyIf));
builder.append("},\n");
}
// CPU features
builder.append(applyIfCPUFeature);
}
}
String counts() {
if (!isPositiveRule) {
return String.format("""
@IR(failOn = {IRNode.LOAD_VECTOR_%s,
IRNode.%s,
IRNode.STORE_VECTOR},
""",
type.letter(),
irNode);
} else if (size == null) {
return String.format("""
@IR(counts = {IRNode.LOAD_VECTOR_%s, ">0",
IRNode.%s, ">0",
IRNode.STORE_VECTOR, ">0"},
""",
type.letter(),
irNode);
} else {
return String.format("""
@IR(counts = {IRNode.LOAD_VECTOR_%s, IRNode.VECTOR_SIZE + "%s", ">0",
IRNode.%s, IRNode.VECTOR_SIZE + "%s", ">0",
IRNode.STORE_VECTOR, ">0"},
""",
type.letter(), size,
irNode, size);
}
}
}
}