jdk-24/test/hotspot/jtreg/compiler/vectorization/runner/LoopCombinedOpTest.java
Emanuel Peter 811d08c0a4 8340010: Fix vectorization tests with compact headers
Reviewed-by: chagedorn, rkennke, mli
2024-11-25 10:39:36 +00:00

484 lines
18 KiB
Java

/*
* Copyright (c) 2022, 2023, Arm Limited. All rights reserved.
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @summary Vectorization test on combined operations
* @library /test/lib /
*
* @build jdk.test.whitebox.WhiteBox
* compiler.vectorization.runner.VectorizationTestRunner
*
* @requires vm.compiler2.enabled
*
* @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
*
* @run main/othervm -Xbootclasspath/a:.
* -XX:+UnlockDiagnosticVMOptions
* -XX:+WhiteBoxAPI
* compiler.vectorization.runner.LoopCombinedOpTest nCOH_nAV
*
* @run main/othervm -Xbootclasspath/a:.
* -XX:+UnlockDiagnosticVMOptions
* -XX:+WhiteBoxAPI
* compiler.vectorization.runner.LoopCombinedOpTest nCOH_yAV
*
* @run main/othervm -Xbootclasspath/a:.
* -XX:+UnlockDiagnosticVMOptions
* -XX:+WhiteBoxAPI
* compiler.vectorization.runner.LoopCombinedOpTest yCOH_nAV
*
* @run main/othervm -Xbootclasspath/a:.
* -XX:+UnlockDiagnosticVMOptions
* -XX:+WhiteBoxAPI
* compiler.vectorization.runner.LoopCombinedOpTest yCOH_yAV
*/
package compiler.vectorization.runner;
import compiler.lib.ir_framework.*;
import java.util.Random;
public class LoopCombinedOpTest extends VectorizationTestRunner {
// We must pass the flags directly to the test-VM, and not the driver vm in the @run above.
@Override
protected String[] testVMFlags(String[] args) {
return switch (args[0]) {
case "nCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"};
case "nCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"};
case "yCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"};
case "yCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"};
default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
};
}
private static final int SIZE = 543;
private int[] a;
private int[] b;
private int[] c;
private int[] d;
private long[] l1;
private long[] l2;
private short[] s1;
private short[] s2;
private int intInv;
public LoopCombinedOpTest() {
a = new int[SIZE];
b = new int[SIZE];
c = new int[SIZE];
d = new int[SIZE];
l1 = new long[SIZE];
l2 = new long[SIZE];
s1 = new short[SIZE];
s2 = new short[SIZE];
for (int i = 0; i < SIZE; i++) {
a[i] = -654321 * i;
b[i] = 123456 * i;
c[i] = -998877 * i;
d[i] = 778899 * i;
l1[i] = 5000000000L * i;
l2[i] = -600000000L * i;
s1[i] = (short) (3 * i);
s2[i] = (short) (-2 * i);
}
Random ran = new Random(999);
intInv = ran.nextInt();
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_I, "> 0"})
public int[] opWithConstant() {
int[] res = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res[i] = a[i] + 1234567890;
}
return res;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_I, "> 0"})
public int[] opWithLoopInvariant() {
int[] res = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res[i] = b[i] * intInv;
}
return res;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_I, "> 0"})
public int[] opWithConstantAndLoopInvariant() {
int[] res = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res[i] = c[i] * (intInv & 0xfff);
}
return res;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_I, "> 0"})
public int[] multipleOps() {
int[] res = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res[i] = a[i] & b[i] + c[i] & d[i];
}
return res;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_I, "> 0"})
public int[] multipleOpsWithMultipleConstants() {
int[] res = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res[i] = a[i] * 12345678 + 87654321 + b[i] & 0xffff - c[i] * d[i] * 2;
}
return res;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_I, "> 0"})
// With sse2, the MulI does not vectorize. This means we have vectorized stores
// to res1, but scalar loads from res1. The store-to-load-forwarding failure
// detection catches this and rejects vectorization.
public int[] multipleStores() {
int[] res1 = new int[SIZE];
int[] res2 = new int[SIZE];
int[] res3 = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res1[i] = a[i] & b[i];
res2[i] = c[i] | d[i];
res3[i] = res1[i] * res2[i];
}
return res3;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_I, "> 0"})
public int[] multipleStoresWithCommonSubExpression() {
int[] res1 = new int[SIZE];
int[] res2 = new int[SIZE];
int[] res3 = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res1[i] = a[i] * b[i];
res2[i] = c[i] * d[i];
res3[i] = res1[i] + res2[i];
}
return res3;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_S, "> 0",
IRNode.LOAD_VECTOR_I, "> 0"})
public int[] multipleOpsWith2DifferentTypes() {
short[] res1 = new short[SIZE];
int[] res2 = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res1[i] = (short) (s1[i] + s2[i]);
res2[i] = a[i] + b[i];
// We have a mix of int and short loads/stores.
// With UseCompactObjectHeaders and AlignVector,
// we must 8-byte align all vector loads/stores.
//
// int:
// adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
// = 16 (or 12 if UseCompactObjectHeaders=true)
// If UseCompactObjectHeaders=false: iter % 2 = 0
// If UseCompactObjectHeaders=true: iter % 2 = 1
//
// byte:
// adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
// = 16 (or 12 if UseCompactObjectHeaders=true)
// If UseCompactObjectHeaders=false: iter % 8 = 0
// If UseCompactObjectHeaders=true: iter % 8 = 4
//
// -> we cannot align both if UseCompactObjectHeaders=true.
}
return res2;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_ANY, "> 0",
IRNode.LOAD_VECTOR_L, "> 0"})
public long[] multipleOpsWith3DifferentTypes() {
short[] res1 = new short[SIZE];
int[] res2 = new int[SIZE];
long[] res3 = new long[SIZE];
for (int i = 0; i < SIZE; i++) {
res1[i] = (short) (s1[i] + s2[i]);
res2[i] = a[i] + b[i];
res3[i] = l1[i] + l2[i];
// We have a mix of int and short loads/stores.
// With UseCompactObjectHeaders and AlignVector,
// we must 8-byte align all vector loads/stores.
//
// int:
// adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
// = 16 (or 12 if UseCompactObjectHeaders=true)
// If UseCompactObjectHeaders=false: iter % 2 = 0
// If UseCompactObjectHeaders=true: iter % 2 = 1
//
// byte:
// adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
// = 16 (or 12 if UseCompactObjectHeaders=true)
// If UseCompactObjectHeaders=false: iter % 8 = 0
// If UseCompactObjectHeaders=true: iter % 8 = 4
//
// -> we cannot align both if UseCompactObjectHeaders=true.
}
return res3;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_ANY, "> 0",
IRNode.LOAD_VECTOR_L, "> 0"})
public long[] multipleOpsWith2NonAdjacentTypes() {
short[] res1 = new short[SIZE];
long[] res2 = new long[SIZE];
for (int i = 0; i < SIZE; i++) {
res1[i] = (short) (s1[i] + s2[i]);
res2[i] = l1[i] + l2[i];
}
return res2;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_S, "> 0",
IRNode.LOAD_VECTOR_I, "> 0"})
public int[] multipleOpsWith2DifferentTypesAndConstant() {
short[] res1 = new short[SIZE];
int[] res2 = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res1[i] = (short) (s1[i] + s2[i]);
res2[i] = a[i] + 88888888;;
// We have a mix of int and short loads/stores.
// With UseCompactObjectHeaders and AlignVector,
// we must 8-byte align all vector loads/stores.
//
// int:
// adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
// = 16 (or 12 if UseCompactObjectHeaders=true)
// If UseCompactObjectHeaders=false: iter % 2 = 0
// If UseCompactObjectHeaders=true: iter % 2 = 1
//
// byte:
// adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
// = 16 (or 12 if UseCompactObjectHeaders=true)
// If UseCompactObjectHeaders=false: iter % 8 = 0
// If UseCompactObjectHeaders=true: iter % 8 = 4
//
// -> we cannot align both if UseCompactObjectHeaders=true.
}
return res2;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_S, "> 0",
IRNode.LOAD_VECTOR_I, "> 0"})
public int[] multipleOpsWith2DifferentTypesAndInvariant() {
short[] res1 = new short[SIZE];
int[] res2 = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res1[i] = (short) (s1[i] + s2[i]);
res2[i] = a[i] * intInv;
// We have a mix of int and short loads/stores.
// With UseCompactObjectHeaders and AlignVector,
// we must 8-byte align all vector loads/stores.
//
// int:
// adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
// = 16 (or 12 if UseCompactObjectHeaders=true)
// If UseCompactObjectHeaders=false: iter % 2 = 0
// If UseCompactObjectHeaders=true: iter % 2 = 1
//
// byte:
// adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
// = 16 (or 12 if UseCompactObjectHeaders=true)
// If UseCompactObjectHeaders=false: iter % 8 = 0
// If UseCompactObjectHeaders=true: iter % 8 = 4
//
// -> we cannot align both if UseCompactObjectHeaders=true.
}
return res2;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_S, "> 0",
IRNode.LOAD_VECTOR_I, "> 0"})
public int[] multipleOpsWith2DifferentTypesAndComplexExpression() {
short[] res1 = new short[SIZE];
int[] res2 = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res1[i] = (short) (s1[i] + s2[i]);
res2[i] = a[i] * (b[i] + intInv * c[i] & 0xfffffa);
// same argument as in multipleOpsWith2DifferentTypesAndInvariant.
}
return res2;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse3", "true"},
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_S, "> 0",
IRNode.LOAD_VECTOR_I, "> 0"})
public int[] multipleOpsWith2DifferentTypesAndSharedOp() {
int i = 0, sum = 0;
int[] res1 = new int[SIZE];
short[] res2 = new short[SIZE];
while (++i < SIZE) {
sum += (res1[i]--);
res2[i]++;
// We have a mix of int and short loads/stores.
// With UseCompactObjectHeaders and AlignVector,
// we must 8-byte align all vector loads/stores.
//
// int:
// adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
// = 16 (or 12 if UseCompactObjectHeaders=true)
// If UseCompactObjectHeaders=false: iter % 2 = 0
// If UseCompactObjectHeaders=true: iter % 2 = 1
//
// byte:
// adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
// = 16 (or 12 if UseCompactObjectHeaders=true)
// If UseCompactObjectHeaders=false: iter % 8 = 0
// If UseCompactObjectHeaders=true: iter % 8 = 4
//
// -> we cannot align both if UseCompactObjectHeaders=true.
}
return res1;
}
@Test
// POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
public int[] fillIndexPlusStride() {
int[] res = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res[i] = i + 1;
}
return res;
}
@Test
// POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
public int[] addArrayWithIndex() {
int[] res = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res[i] = a[i] + i;
}
return res;
}
@Test
// POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
public short[] multiplyAddShortIndex() {
short[] res = new short[SIZE];
for (int i = 0; i < SIZE; i++) {
res[i] = (short) (i * i + i);
}
return res;
}
@Test
// POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
public int[] multiplyBySumOfIndexAndInvariant() {
int[] res = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
res[i] = a[i] * (i + 10 + intInv);
}
return res;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
counts = {IRNode.STORE_VECTOR, ">0"})
public int[] manuallyUnrolledStride2() {
int[] res = new int[SIZE];
for (int i = 0; i < SIZE - 1; i += 2) {
res[i] = a[i] * b[i];
res[i + 1] = a[i + 1] * b[i + 1];
// Hand-unrolling can mess with alignment!
//
// With UseCompactObjectHeaders and AlignVector,
// we must 8-byte align all vector loads/stores.
//
// adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
// = 16 (or 12 if UseCompactObjectHeaders=true)
// If UseCompactObjectHeaders=false: 16 divisible by 8 -> vectorize
// If UseCompactObjectHeaders=true: 12 not divisibly by 8 -> not vectorize
}
return res;
}
@Test
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
counts = {IRNode.STORE_VECTOR, ">0",
IRNode.LOAD_VECTOR_I, "> 0"})
public int partialVectorizableLoop() {
int[] res = new int[SIZE];
int k = 9;
for (int i = 0; i < SIZE / 2; i++) {
res[i] = a[i] * b[i];
k = 3 * k + 1;
}
return k;
}
}