811d08c0a4
Reviewed-by: chagedorn, rkennke, mli
484 lines
18 KiB
Java
484 lines
18 KiB
Java
/*
|
|
* Copyright (c) 2022, 2023, Arm Limited. All rights reserved.
|
|
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*/
|
|
|
|
/*
|
|
* @test
|
|
* @summary Vectorization test on combined operations
|
|
* @library /test/lib /
|
|
*
|
|
* @build jdk.test.whitebox.WhiteBox
|
|
* compiler.vectorization.runner.VectorizationTestRunner
|
|
*
|
|
* @requires vm.compiler2.enabled
|
|
*
|
|
* @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
|
|
*
|
|
* @run main/othervm -Xbootclasspath/a:.
|
|
* -XX:+UnlockDiagnosticVMOptions
|
|
* -XX:+WhiteBoxAPI
|
|
* compiler.vectorization.runner.LoopCombinedOpTest nCOH_nAV
|
|
*
|
|
* @run main/othervm -Xbootclasspath/a:.
|
|
* -XX:+UnlockDiagnosticVMOptions
|
|
* -XX:+WhiteBoxAPI
|
|
* compiler.vectorization.runner.LoopCombinedOpTest nCOH_yAV
|
|
*
|
|
* @run main/othervm -Xbootclasspath/a:.
|
|
* -XX:+UnlockDiagnosticVMOptions
|
|
* -XX:+WhiteBoxAPI
|
|
* compiler.vectorization.runner.LoopCombinedOpTest yCOH_nAV
|
|
*
|
|
* @run main/othervm -Xbootclasspath/a:.
|
|
* -XX:+UnlockDiagnosticVMOptions
|
|
* -XX:+WhiteBoxAPI
|
|
* compiler.vectorization.runner.LoopCombinedOpTest yCOH_yAV
|
|
*/
|
|
|
|
package compiler.vectorization.runner;
|
|
|
|
import compiler.lib.ir_framework.*;
|
|
|
|
import java.util.Random;
|
|
|
|
public class LoopCombinedOpTest extends VectorizationTestRunner {
|
|
|
|
// We must pass the flags directly to the test-VM, and not the driver vm in the @run above.
|
|
@Override
|
|
protected String[] testVMFlags(String[] args) {
|
|
return switch (args[0]) {
|
|
case "nCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"};
|
|
case "nCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"};
|
|
case "yCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"};
|
|
case "yCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"};
|
|
default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
|
|
};
|
|
}
|
|
|
|
private static final int SIZE = 543;
|
|
|
|
private int[] a;
|
|
private int[] b;
|
|
private int[] c;
|
|
private int[] d;
|
|
private long[] l1;
|
|
private long[] l2;
|
|
private short[] s1;
|
|
private short[] s2;
|
|
private int intInv;
|
|
|
|
public LoopCombinedOpTest() {
|
|
a = new int[SIZE];
|
|
b = new int[SIZE];
|
|
c = new int[SIZE];
|
|
d = new int[SIZE];
|
|
l1 = new long[SIZE];
|
|
l2 = new long[SIZE];
|
|
s1 = new short[SIZE];
|
|
s2 = new short[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
a[i] = -654321 * i;
|
|
b[i] = 123456 * i;
|
|
c[i] = -998877 * i;
|
|
d[i] = 778899 * i;
|
|
l1[i] = 5000000000L * i;
|
|
l2[i] = -600000000L * i;
|
|
s1[i] = (short) (3 * i);
|
|
s2[i] = (short) (-2 * i);
|
|
}
|
|
Random ran = new Random(999);
|
|
intInv = ran.nextInt();
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
public int[] opWithConstant() {
|
|
int[] res = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res[i] = a[i] + 1234567890;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
public int[] opWithLoopInvariant() {
|
|
int[] res = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res[i] = b[i] * intInv;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
public int[] opWithConstantAndLoopInvariant() {
|
|
int[] res = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res[i] = c[i] * (intInv & 0xfff);
|
|
}
|
|
return res;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
public int[] multipleOps() {
|
|
int[] res = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res[i] = a[i] & b[i] + c[i] & d[i];
|
|
}
|
|
return res;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
public int[] multipleOpsWithMultipleConstants() {
|
|
int[] res = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res[i] = a[i] * 12345678 + 87654321 + b[i] & 0xffff - c[i] * d[i] * 2;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
// With sse2, the MulI does not vectorize. This means we have vectorized stores
|
|
// to res1, but scalar loads from res1. The store-to-load-forwarding failure
|
|
// detection catches this and rejects vectorization.
|
|
public int[] multipleStores() {
|
|
int[] res1 = new int[SIZE];
|
|
int[] res2 = new int[SIZE];
|
|
int[] res3 = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res1[i] = a[i] & b[i];
|
|
res2[i] = c[i] | d[i];
|
|
res3[i] = res1[i] * res2[i];
|
|
}
|
|
return res3;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
public int[] multipleStoresWithCommonSubExpression() {
|
|
int[] res1 = new int[SIZE];
|
|
int[] res2 = new int[SIZE];
|
|
int[] res3 = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res1[i] = a[i] * b[i];
|
|
res2[i] = c[i] * d[i];
|
|
res3[i] = res1[i] + res2[i];
|
|
}
|
|
return res3;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
|
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_S, "> 0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
public int[] multipleOpsWith2DifferentTypes() {
|
|
short[] res1 = new short[SIZE];
|
|
int[] res2 = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res1[i] = (short) (s1[i] + s2[i]);
|
|
res2[i] = a[i] + b[i];
|
|
// We have a mix of int and short loads/stores.
|
|
// With UseCompactObjectHeaders and AlignVector,
|
|
// we must 8-byte align all vector loads/stores.
|
|
//
|
|
// int:
|
|
// adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
|
|
// = 16 (or 12 if UseCompactObjectHeaders=true)
|
|
// If UseCompactObjectHeaders=false: iter % 2 = 0
|
|
// If UseCompactObjectHeaders=true: iter % 2 = 1
|
|
//
|
|
// byte:
|
|
// adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
|
|
// = 16 (or 12 if UseCompactObjectHeaders=true)
|
|
// If UseCompactObjectHeaders=false: iter % 8 = 0
|
|
// If UseCompactObjectHeaders=true: iter % 8 = 4
|
|
//
|
|
// -> we cannot align both if UseCompactObjectHeaders=true.
|
|
}
|
|
return res2;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
|
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_ANY, "> 0",
|
|
IRNode.LOAD_VECTOR_L, "> 0"})
|
|
public long[] multipleOpsWith3DifferentTypes() {
|
|
short[] res1 = new short[SIZE];
|
|
int[] res2 = new int[SIZE];
|
|
long[] res3 = new long[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res1[i] = (short) (s1[i] + s2[i]);
|
|
res2[i] = a[i] + b[i];
|
|
res3[i] = l1[i] + l2[i];
|
|
// We have a mix of int and short loads/stores.
|
|
// With UseCompactObjectHeaders and AlignVector,
|
|
// we must 8-byte align all vector loads/stores.
|
|
//
|
|
// int:
|
|
// adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
|
|
// = 16 (or 12 if UseCompactObjectHeaders=true)
|
|
// If UseCompactObjectHeaders=false: iter % 2 = 0
|
|
// If UseCompactObjectHeaders=true: iter % 2 = 1
|
|
//
|
|
// byte:
|
|
// adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
|
|
// = 16 (or 12 if UseCompactObjectHeaders=true)
|
|
// If UseCompactObjectHeaders=false: iter % 8 = 0
|
|
// If UseCompactObjectHeaders=true: iter % 8 = 4
|
|
//
|
|
// -> we cannot align both if UseCompactObjectHeaders=true.
|
|
}
|
|
return res3;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_ANY, "> 0",
|
|
IRNode.LOAD_VECTOR_L, "> 0"})
|
|
public long[] multipleOpsWith2NonAdjacentTypes() {
|
|
short[] res1 = new short[SIZE];
|
|
long[] res2 = new long[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res1[i] = (short) (s1[i] + s2[i]);
|
|
res2[i] = l1[i] + l2[i];
|
|
}
|
|
return res2;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
|
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_S, "> 0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
public int[] multipleOpsWith2DifferentTypesAndConstant() {
|
|
short[] res1 = new short[SIZE];
|
|
int[] res2 = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res1[i] = (short) (s1[i] + s2[i]);
|
|
res2[i] = a[i] + 88888888;;
|
|
// We have a mix of int and short loads/stores.
|
|
// With UseCompactObjectHeaders and AlignVector,
|
|
// we must 8-byte align all vector loads/stores.
|
|
//
|
|
// int:
|
|
// adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
|
|
// = 16 (or 12 if UseCompactObjectHeaders=true)
|
|
// If UseCompactObjectHeaders=false: iter % 2 = 0
|
|
// If UseCompactObjectHeaders=true: iter % 2 = 1
|
|
//
|
|
// byte:
|
|
// adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
|
|
// = 16 (or 12 if UseCompactObjectHeaders=true)
|
|
// If UseCompactObjectHeaders=false: iter % 8 = 0
|
|
// If UseCompactObjectHeaders=true: iter % 8 = 4
|
|
//
|
|
// -> we cannot align both if UseCompactObjectHeaders=true.
|
|
}
|
|
return res2;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
|
|
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_S, "> 0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
public int[] multipleOpsWith2DifferentTypesAndInvariant() {
|
|
short[] res1 = new short[SIZE];
|
|
int[] res2 = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res1[i] = (short) (s1[i] + s2[i]);
|
|
res2[i] = a[i] * intInv;
|
|
// We have a mix of int and short loads/stores.
|
|
// With UseCompactObjectHeaders and AlignVector,
|
|
// we must 8-byte align all vector loads/stores.
|
|
//
|
|
// int:
|
|
// adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
|
|
// = 16 (or 12 if UseCompactObjectHeaders=true)
|
|
// If UseCompactObjectHeaders=false: iter % 2 = 0
|
|
// If UseCompactObjectHeaders=true: iter % 2 = 1
|
|
//
|
|
// byte:
|
|
// adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
|
|
// = 16 (or 12 if UseCompactObjectHeaders=true)
|
|
// If UseCompactObjectHeaders=false: iter % 8 = 0
|
|
// If UseCompactObjectHeaders=true: iter % 8 = 4
|
|
//
|
|
// -> we cannot align both if UseCompactObjectHeaders=true.
|
|
}
|
|
return res2;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
|
|
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_S, "> 0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
public int[] multipleOpsWith2DifferentTypesAndComplexExpression() {
|
|
short[] res1 = new short[SIZE];
|
|
int[] res2 = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res1[i] = (short) (s1[i] + s2[i]);
|
|
res2[i] = a[i] * (b[i] + intInv * c[i] & 0xfffffa);
|
|
// same argument as in multipleOpsWith2DifferentTypesAndInvariant.
|
|
}
|
|
return res2;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse3", "true"},
|
|
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_S, "> 0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
public int[] multipleOpsWith2DifferentTypesAndSharedOp() {
|
|
int i = 0, sum = 0;
|
|
int[] res1 = new int[SIZE];
|
|
short[] res2 = new short[SIZE];
|
|
while (++i < SIZE) {
|
|
sum += (res1[i]--);
|
|
res2[i]++;
|
|
// We have a mix of int and short loads/stores.
|
|
// With UseCompactObjectHeaders and AlignVector,
|
|
// we must 8-byte align all vector loads/stores.
|
|
//
|
|
// int:
|
|
// adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
|
|
// = 16 (or 12 if UseCompactObjectHeaders=true)
|
|
// If UseCompactObjectHeaders=false: iter % 2 = 0
|
|
// If UseCompactObjectHeaders=true: iter % 2 = 1
|
|
//
|
|
// byte:
|
|
// adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
|
|
// = 16 (or 12 if UseCompactObjectHeaders=true)
|
|
// If UseCompactObjectHeaders=false: iter % 8 = 0
|
|
// If UseCompactObjectHeaders=true: iter % 8 = 4
|
|
//
|
|
// -> we cannot align both if UseCompactObjectHeaders=true.
|
|
}
|
|
return res1;
|
|
}
|
|
|
|
@Test
|
|
// POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
|
|
public int[] fillIndexPlusStride() {
|
|
int[] res = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res[i] = i + 1;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
@Test
|
|
// POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
|
|
public int[] addArrayWithIndex() {
|
|
int[] res = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res[i] = a[i] + i;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
@Test
|
|
// POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
|
|
public short[] multiplyAddShortIndex() {
|
|
short[] res = new short[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res[i] = (short) (i * i + i);
|
|
}
|
|
return res;
|
|
}
|
|
|
|
@Test
|
|
// POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
|
|
public int[] multiplyBySumOfIndexAndInvariant() {
|
|
int[] res = new int[SIZE];
|
|
for (int i = 0; i < SIZE; i++) {
|
|
res[i] = a[i] * (i + 10 + intInv);
|
|
}
|
|
return res;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
|
|
applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
|
|
counts = {IRNode.STORE_VECTOR, ">0"})
|
|
public int[] manuallyUnrolledStride2() {
|
|
int[] res = new int[SIZE];
|
|
for (int i = 0; i < SIZE - 1; i += 2) {
|
|
res[i] = a[i] * b[i];
|
|
res[i + 1] = a[i + 1] * b[i + 1];
|
|
// Hand-unrolling can mess with alignment!
|
|
//
|
|
// With UseCompactObjectHeaders and AlignVector,
|
|
// we must 8-byte align all vector loads/stores.
|
|
//
|
|
// adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
|
|
// = 16 (or 12 if UseCompactObjectHeaders=true)
|
|
// If UseCompactObjectHeaders=false: 16 divisible by 8 -> vectorize
|
|
// If UseCompactObjectHeaders=true: 12 not divisibly by 8 -> not vectorize
|
|
}
|
|
return res;
|
|
}
|
|
|
|
@Test
|
|
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
|
|
counts = {IRNode.STORE_VECTOR, ">0",
|
|
IRNode.LOAD_VECTOR_I, "> 0"})
|
|
public int partialVectorizableLoop() {
|
|
int[] res = new int[SIZE];
|
|
int k = 9;
|
|
for (int i = 0; i < SIZE / 2; i++) {
|
|
res[i] = a[i] * b[i];
|
|
k = 3 * k + 1;
|
|
}
|
|
return k;
|
|
}
|
|
}
|