From 811d08c0a4e0da55f306686423aec40d29fabf00 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Mon, 25 Nov 2024 10:39:36 +0000 Subject: [PATCH] 8340010: Fix vectorization tests with compact headers Reviewed-by: chagedorn, rkennke, mli --- .../c2/TestCastX2NotProcessedIGVN.java | 21 +- .../c2/irTests/TestVectorConditionalMove.java | 22 +- .../TestVectorizationMismatchedAccess.java | 120 ++++++++--- .../c2/irTests/TestVectorizationNotRun.java | 25 ++- .../loopopts/superword/TestAlignVector.java | 168 +++++++++++++-- ...tIndependentPacksWithCyclicDependency.java | 49 ++++- .../loopopts/superword/TestMulAddS2I.java | 61 ++++-- .../TestScheduleReordersScalarMemops.java | 36 +++- .../loopopts/superword/TestSplitPacks.java | 200 +++++++++++++++--- ...norderedReductionPartialVectorization.java | 11 +- .../TestFloatConversionsVector.java | 37 +++- .../runner/ArrayTypeConvertTest.java | 93 +++++++- .../runner/LoopCombinedOpTest.java | 198 +++++++++++++++-- .../runner/VectorizationTestRunner.java | 10 +- .../ir_framework/examples/IRExample.java | 4 +- 15 files changed, 915 insertions(+), 140 deletions(-) diff --git a/test/hotspot/jtreg/compiler/c2/TestCastX2NotProcessedIGVN.java b/test/hotspot/jtreg/compiler/c2/TestCastX2NotProcessedIGVN.java index 086711085b4..ca5a754f296 100644 --- a/test/hotspot/jtreg/compiler/c2/TestCastX2NotProcessedIGVN.java +++ b/test/hotspot/jtreg/compiler/c2/TestCastX2NotProcessedIGVN.java @@ -1,5 +1,6 @@ /* * Copyright (c) 2024, Red Hat, Inc. All rights reserved. + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,7 +43,19 @@ public class TestCastX2NotProcessedIGVN { public static void main(String[] args) { - TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED"); + // Cross-product: +-AlignVector and +-UseCompactObjectHeaders + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", + "-XX:-AlignVector"); + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", + "-XX:+AlignVector"); + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", + "-XX:-AlignVector"); + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", + "-XX:+AlignVector"); } @Test @@ -63,6 +76,7 @@ public class TestCastX2NotProcessedIGVN { @Test @IR(counts = {IRNode.LOAD_VECTOR_I, "> 1"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfPlatformOr = {"x64", "true", "aarch64", "true"}) public static int test2(int stop, int[] array) { int v = 0; @@ -70,6 +84,11 @@ public class TestCastX2NotProcessedIGVN { for (int i = 0; i < stop; i++) { long offset = ((long)i) * 4; array[i] = UNSAFE.getInt(null, offset + base); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // I_adr = base + 16 + 4*i -> i % 2 = 0 B_adr = base + 12 + 4*i -> i % 2 = 1 + // N_adr = base + 4*i -> i % 2 = 0 N_adr = base + 4*i -> i % 2 = 0 + // -> vectorize -> no vectorization } return v; } diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java index 6f6b7f5bd30..c8b3e5a3aa6 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java @@ -1,6 +1,6 @@ /* * Copyright (c) 2022, Arm Limited. All rights reserved. - * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -43,7 +43,15 @@ public class TestVectorConditionalMove { private static final Random RANDOM = Utils.getRandomInstance(); public static void main(String[] args) { - TestFramework.runWithFlags("-XX:+UseCMoveUnconditionally", "-XX:+UseVectorCmov"); + // Cross-product: +-AlignVector and +-UseCompactObjectHeaders + TestFramework.runWithFlags("-XX:+UseCMoveUnconditionally", "-XX:+UseVectorCmov", + "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); + TestFramework.runWithFlags("-XX:+UseCMoveUnconditionally", "-XX:+UseVectorCmov", + "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); + TestFramework.runWithFlags("-XX:+UseCMoveUnconditionally", "-XX:+UseVectorCmov", + "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); + TestFramework.runWithFlags("-XX:+UseCMoveUnconditionally", "-XX:+UseVectorCmov", + "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); } // Compare 2 values, and pick one of them @@ -400,11 +408,16 @@ public class TestVectorConditionalMove { IRNode.VECTOR_MASK_CMP_F, ">0", IRNode.VECTOR_BLEND_F, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) private static void testCMoveFLTforFConstH2(float[] a, float[] b, float[] c) { for (int i = 0; i < a.length; i+=2) { c[i+0] = (a[i+0] < b[i+0]) ? 0.1f : -0.1f; c[i+1] = (a[i+1] < b[i+1]) ? 0.1f : -0.1f; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never + // -> vectorize -> no vectorization } } @@ -413,11 +426,16 @@ public class TestVectorConditionalMove { IRNode.VECTOR_MASK_CMP_F, ">0", IRNode.VECTOR_BLEND_F, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) private static void testCMoveFLEforFConstH2(float[] a, float[] b, float[] c) { for (int i = 0; i < a.length; i+=2) { c[i+0] = (a[i+0] <= b[i+0]) ? 0.1f : -0.1f; c[i+1] = (a[i+1] <= b[i+1]) ? 0.1f : -0.1f; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never + // -> vectorize -> no vectorization } } diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestVectorizationMismatchedAccess.java b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorizationMismatchedAccess.java index b511476bf52..c891145b08d 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/TestVectorizationMismatchedAccess.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorizationMismatchedAccess.java @@ -50,7 +50,19 @@ public class TestVectorizationMismatchedAccess { private final static WhiteBox wb = WhiteBox.getWhiteBox(); public static void main(String[] args) { - TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED"); + // Cross-product: +-AlignVector and +-UseCompactObjectHeaders + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", + "-XX:-AlignVector"); + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", + "-XX:+AlignVector"); + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", + "-XX:-AlignVector"); + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", + "-XX:+AlignVector"); } static int size = 1024; @@ -153,8 +165,7 @@ public class TestVectorizationMismatchedAccess { @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). @@ -162,38 +173,48 @@ public class TestVectorizationMismatchedAccess { public static void testByteLong1a(byte[] dest, long[] src) { for (int i = 0; i < src.length; i++) { UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, handleByteOrder(src[i])); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // B_adr = base + 16 + 8*i -> always B_adr = base + 12 + 8*i -> never + // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always + // -> vectorize -> no vectorization } } @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: address has ConvL2I for cast of long to address, not supported. public static void testByteLong1b(byte[] dest, long[] src) { for (int i = 0; i < src.length; i++) { UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, handleByteOrder(src[i])); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // B_adr = base + 16 + 8*i -> always B_adr = base + 12 + 8*i -> never + // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always + // -> vectorize -> no vectorization } } @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) public static void testByteLong1c(byte[] dest, long[] src) { long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit) for (int i = 0; i < src.length - 8; i++) { UNSAFE.putLongUnaligned(dest, base + 8 * i, handleByteOrder(src[i])); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // B_adr = base + 64 + 8*i -> always B_adr = base + 64 + 8*i -> always + // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always + // -> vectorize -> vectorize } } @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: address has ConvL2I for cast of long to address, not supported. @@ -201,6 +222,11 @@ public class TestVectorizationMismatchedAccess { long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit) for (int i = 0; i < src.length - 8; i++) { UNSAFE.putLongUnaligned(dest, base + 8L * i, handleByteOrder(src[i])); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // B_adr = base + 64 + 8*i -> always B_adr = base + 64 + 8*i -> always + // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always + // -> vectorize -> vectorize } } @@ -214,6 +240,7 @@ public class TestVectorizationMismatchedAccess { @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). @@ -221,17 +248,28 @@ public class TestVectorizationMismatchedAccess { public static void testByteLong2a(byte[] dest, long[] src) { for (int i = 1; i < src.length; i++) { UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), handleByteOrder(src[i])); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // B_adr = base + 16 + 8*(i-1) -> always B_adr = base + 12 + 8*(i-1) -> never + // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always + // -> vectorize -> no vectorization } } @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: address has ConvL2I for cast of long to address, not supported. public static void testByteLong2b(byte[] dest, long[] src) { for (int i = 1; i < src.length; i++) { UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), handleByteOrder(src[i])); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // B_adr = base + 16 + 8*(i-1) -> always B_adr = base + 12 + 8*(i-1) -> never + // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always + // -> vectorize -> no vectorization } } @@ -243,8 +281,7 @@ public class TestVectorizationMismatchedAccess { @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). @@ -252,19 +289,28 @@ public class TestVectorizationMismatchedAccess { public static void testByteLong3a(byte[] dest, long[] src) { for (int i = 0; i < src.length - 1; i++) { UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), handleByteOrder(src[i])); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // B_adr = base + 16 + 8*(i+1) -> always B_adr = base + 12 + 8*(i+1) -> never + // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always + // -> vectorize -> no vectorization } } @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: address has ConvL2I for cast of long to address, not supported. public static void testByteLong3b(byte[] dest, long[] src) { for (int i = 0; i < src.length - 1; i++) { UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), handleByteOrder(src[i])); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // B_adr = base + 16 + 8*(i+1) -> always B_adr = base + 12 + 8*(i+1) -> never + // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always + // -> vectorize -> no vectorization } } @@ -310,8 +356,7 @@ public class TestVectorizationMismatchedAccess { @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). @@ -319,19 +364,28 @@ public class TestVectorizationMismatchedAccess { public static void testByteLong5a(byte[] dest, long[] src, int start, int stop) { for (int i = start; i < stop; i++) { UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), handleByteOrder(src[i])); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // B_adr = base + 16 + 8*(i+x) -> always B_adr = base + 12 + 8*(i+x) -> never + // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always + // -> vectorize -> no vectorization } } @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: address has ConvL2I for cast of long to address, not supported. public static void testByteLong5b(byte[] dest, long[] src, int start, int stop) { for (int i = start; i < stop; i++) { UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), handleByteOrder(src[i])); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // B_adr = base + 16 + 8*(i+x) -> always B_adr = base + 12 + 8*(i+x) -> never + // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always + // -> vectorize -> no vectorization } } @@ -344,8 +398,7 @@ public class TestVectorizationMismatchedAccess { @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). @@ -353,19 +406,28 @@ public class TestVectorizationMismatchedAccess { public static void testByteByte1a(byte[] dest, byte[] src) { for (int i = 0; i < src.length / 8; i++) { UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i)); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // src_adr = base + 16 + 8*i -> always src_adr = base + 12 + 8*i -> never + // dst_adr = base + 16 + 8*i -> always dst_adr = base + 12 + 8*i -> never + // -> vectorize -> no vectorization } } @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: address has ConvL2I for cast of long to address, not supported. public static void testByteByte1b(byte[] dest, byte[] src) { for (int i = 0; i < src.length / 8; i++) { UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i)); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // src_adr = base + 16 + 8*i -> always src_adr = base + 12 + 8*i -> never + // dst_adr = base + 16 + 8*i -> always dst_adr = base + 12 + 8*i -> never + // -> vectorize -> no vectorization } } @@ -377,8 +439,7 @@ public class TestVectorizationMismatchedAccess { @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). @@ -386,19 +447,28 @@ public class TestVectorizationMismatchedAccess { public static void testByteByte2a(byte[] dest, byte[] src) { for (int i = 1; i < src.length / 8; i++) { UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i)); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // src_adr = base + 16 + 8*i -> always src_adr = base + 12 + 8*i -> never + // dst_adr = base + 16 + 8*(i-1) -> always dst_adr = base + 12 + 8*(i-1) -> never + // -> vectorize -> no vectorization } } @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, applyIfPlatform = {"64-bit", "true"}) // 32-bit: address has ConvL2I for cast of long to address, not supported. public static void testByteByte2b(byte[] dest, byte[] src) { for (int i = 1; i < src.length / 8; i++) { UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i)); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // src_adr = base + 16 + 8*i -> always src_adr = base + 12 + 8*i -> never + // dst_adr = base + 16 + 8*(i-1) -> always dst_adr = base + 12 + 8*(i-1) -> never + // -> vectorize -> no vectorization } } diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestVectorizationNotRun.java b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorizationNotRun.java index d61b8c658d6..27456bf9200 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/TestVectorizationNotRun.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorizationNotRun.java @@ -32,7 +32,6 @@ import java.util.Random; /* * @test * @bug 8300256 - * @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64") * @modules java.base/jdk.internal.misc * @library /test/lib / * @run driver compiler.c2.irTests.TestVectorizationNotRun @@ -42,7 +41,19 @@ public class TestVectorizationNotRun { private static final Unsafe UNSAFE = Unsafe.getUnsafe(); public static void main(String[] args) { - TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED"); + // Cross-product: +-AlignVector and +-UseCompactObjectHeaders + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", + "-XX:-AlignVector"); + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", + "-XX:+AlignVector"); + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", + "-XX:-AlignVector"); + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", + "-XX:+AlignVector"); } static int size = 1024; @@ -52,14 +63,19 @@ public class TestVectorizationNotRun { @Test @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }) + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) public static void test(byte[] dest, long[] src) { for (int i = 0; i < src.length; i++) { if ((i < 0) || (8 > sizeBytes - i)) { throw new IndexOutOfBoundsException(); } UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + i * 8, src[i]); + // For UseCompactObjectHeaders and AlignVector, we must 8-byte align all vector loads/stores. + // But the long-stores to the byte-array are never aligned: + // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) } } @@ -67,5 +83,4 @@ public class TestVectorizationNotRun { public static void test_runner() { test(byteArray, longArray); } - } diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestAlignVector.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestAlignVector.java index 60d753ee75f..cb9484f2668 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestAlignVector.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestAlignVector.java @@ -60,6 +60,24 @@ import java.nio.ByteOrder; * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector */ +/* + * @test id=NoAlignVector-COH + * @bug 8310190 + * @summary Test AlignVector with various loop init, stride, scale, invar, etc. + * @modules java.base/jdk.internal.misc + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector-COH + */ + +/* + * @test id=VerifyAlignVector-COH + * @bug 8310190 + * @summary Test AlignVector with various loop init, stride, scale, invar, etc. + * @modules java.base/jdk.internal.misc + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector-COH + */ + public class TestAlignVector { static int RANGE = 1024*8; static int RANGE_FINAL = 1024*8; @@ -96,9 +114,11 @@ public class TestAlignVector { "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250"); switch (args[0]) { - case "NoAlignVector" -> { framework.addFlags("-XX:-AlignVector"); } - case "AlignVector" -> { framework.addFlags("-XX:+AlignVector"); } - case "VerifyAlignVector" -> { framework.addFlags("-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } + case "NoAlignVector" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); } + case "AlignVector" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); } + case "VerifyAlignVector" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } + case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); } + case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } } framework.start(); @@ -117,7 +137,8 @@ public class TestAlignVector { // Add all tests to list tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); }); - tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); }); + tests.put("test1a", () -> { return test1a(aB.clone(), bB.clone(), mB); }); + tests.put("test1b", () -> { return test1b(aB.clone(), bB.clone(), mB); }); tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); }); tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); }); tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); }); @@ -132,6 +153,7 @@ public class TestAlignVector { tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); }); tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); }); tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); }); + tests.put("test10e", () -> { return test10e(aS.clone(), bS.clone(), mS); }); tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); }); tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); }); @@ -201,7 +223,8 @@ public class TestAlignVector { @Warmup(100) @Run(test = {"test0", - "test1", + "test1a", + "test1b", "test2", "test3", "test4", @@ -214,6 +237,7 @@ public class TestAlignVector { "test10b", "test10c", "test10d", + "test10e", "test11aB", "test11aS", "test11aI", @@ -404,13 +428,37 @@ public class TestAlignVector { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.AND_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"UseCompactObjectHeaders", "false"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, + // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12. + // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out. applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) - static Object[] test1(byte[] a, byte[] b, byte mask) { + static Object[] test1a(byte[] a, byte[] b, byte mask) { for (int i = 0; i < RANGE; i+=8) { - // Safe to vectorize with AlignVector - b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 + b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8 + b[i+1] = (byte)(a[i+1] & mask); + b[i+2] = (byte)(a[i+2] & mask); + b[i+3] = (byte)(a[i+3] & mask); + b[i+4] = (byte)(a[i+4] & mask); + b[i+5] = (byte)(a[i+5] & mask); + b[i+6] = (byte)(a[i+6] & mask); + b[i+7] = (byte)(a[i+7] & mask); + } + return new Object[]{ a, b }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", + IRNode.AND_VB, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfOr = {"UseCompactObjectHeaders", "true", "AlignVector", "false"}, + // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12. + // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out. + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) + static Object[] test1b(byte[] a, byte[] b, byte mask) { + for (int i = 4; i < RANGE-8; i+=8) { + b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4 + iter*8 b[i+1] = (byte)(a[i+1] & mask); b[i+2] = (byte)(a[i+2] & mask); b[i+3] = (byte)(a[i+3] & mask); @@ -714,11 +762,33 @@ public class TestAlignVector { IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"}, + // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12. + // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out. applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test10d(short[] a, short[] b, short mask) { for (int i = 13; i < RANGE-16; i+=8) { - // init + offset -> aligned + // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16 + b[i+0+3] = (short)(a[i+0+3] & mask); + b[i+1+3] = (short)(a[i+1+3] & mask); + b[i+2+3] = (short)(a[i+2+3] & mask); + b[i+3+3] = (short)(a[i+3+3] & mask); + } + return new Object[]{ a, b }; + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "true"}, + // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12. + // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out. + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) + static Object[] test10e(short[] a, short[] b, short mask) { + for (int i = 11; i < RANGE-16; i+=8) { + // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 11) + iter*16 b[i+0+3] = (short)(a[i+0+3] & mask); b[i+1+3] = (short)(a[i+1+3] & mask); b[i+2+3] = (short)(a[i+2+3] & mask); @@ -1008,13 +1078,26 @@ public class TestAlignVector { IRNode.ADD_VB, "> 0", IRNode.ADD_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"UseCompactObjectHeaders", "false"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test13aIB(int[] a, byte[] b) { for (int i = 0; i < RANGE; i++) { + // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) a[i]++; + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) b[i]++; + // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold: + // If UseCompactObjectHeaders=false: + // a: 0, 8, 16, 24, 32, ... + // b: 0, 2, 4, 6, 8, ... + // -> Ok, aligns every 8th iteration. + // If UseCompactObjectHeaders=true: + // a: 4, 12, 20, 28, 36, ... + // b: 1, 3, 5, 7, 9, ... + // -> we can never align both vectors! } return new Object[]{ a, b }; } @@ -1025,13 +1108,26 @@ public class TestAlignVector { IRNode.ADD_VI, "> 0", IRNode.ADD_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"UseCompactObjectHeaders", "false"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test13aIS(int[] a, short[] b) { for (int i = 0; i < RANGE; i++) { + // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) a[i]++; + // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) b[i]++; + // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold: + // If UseCompactObjectHeaders=false: + // a: iter % 2 == 0 + // b: iter % 4 == 0 + // -> Ok, aligns every 4th iteration. + // If UseCompactObjectHeaders=true: + // a: iter % 2 = 1 + // b: iter % 4 = 2 + // -> we can never align both vectors! } return new Object[]{ a, b }; } @@ -1046,15 +1142,27 @@ public class TestAlignVector { IRNode.ADD_VI, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"UseCompactObjectHeaders", "false"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) { for (int i = 0; i < RANGE; i++) { + // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) a[i]++; + // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) b[i]++; + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) c[i]++; + // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8*iter + // = 16 (always) d[i]++; + // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned: + // a: iter % 8 = 4 + // c: iter % 2 = 1 + // -> can never align both vectors! } return new Object[]{ a, b, c, d }; } @@ -1082,13 +1190,21 @@ public class TestAlignVector { IRNode.ADD_VB, "> 0", IRNode.ADD_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"UseCompactObjectHeaders", "false"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test13bIB(int[] a, byte[] b) { for (int i = 1; i < RANGE; i++) { + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) a[i]++; + // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) b[i]++; + // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned: + // a: iter % 2 = 0 + // b: iter % 8 = 3 + // -> can never align both vectors! } return new Object[]{ a, b }; } @@ -1099,13 +1215,21 @@ public class TestAlignVector { IRNode.ADD_VI, "> 0", IRNode.ADD_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"UseCompactObjectHeaders", "false"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test13bIS(int[] a, short[] b) { for (int i = 1; i < RANGE; i++) { + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) a[i]++; + // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) b[i]++; + // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned: + // a: iter % 2 = 0 + // b: iter % 4 = 1 + // -> can never align both vectors! } return new Object[]{ a, b }; } @@ -1120,15 +1244,27 @@ public class TestAlignVector { IRNode.ADD_VI, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"UseCompactObjectHeaders", "false"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) { for (int i = 1; i < RANGE; i++) { + // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) a[i]++; + // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) b[i]++; + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) c[i]++; + // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 + 8*iter + // = 16 (always) d[i]++; + // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned: + // a: iter % 8 = 3 + // c: iter % 2 = 0 + // -> can never align both vectors! } return new Object[]{ a, b, c, d }; } diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestIndependentPacksWithCyclicDependency.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestIndependentPacksWithCyclicDependency.java index 197ae08b6d8..9edd9d28dd8 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestIndependentPacksWithCyclicDependency.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestIndependentPacksWithCyclicDependency.java @@ -29,7 +29,10 @@ * between the packs. * @modules java.base/jdk.internal.misc * @library /test/lib / - * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency + * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency nCOH_nAV + * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency nCOH_yAV + * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency yCOH_nAV + * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency yCOH_yAV */ package compiler.loopopts.superword; @@ -72,11 +75,20 @@ public class TestIndependentPacksWithCyclicDependency { long[] goldL10 = new long[RANGE]; public static void main(String args[]) { - TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", - "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::test*", - "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::verify", - "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::init", - "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000"); + TestFramework framework = new TestFramework(TestIndependentPacksWithCyclicDependency.class); + framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::test*", + "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::verify", + "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::init", + "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000"); + switch (args[0]) { + case "nCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); } + case "nCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); } + case "yCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); } + case "yCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); } + default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } + }; + framework.start(); } TestIndependentPacksWithCyclicDependency() { @@ -118,6 +130,7 @@ public class TestIndependentPacksWithCyclicDependency { @Test @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { @@ -127,6 +140,10 @@ public class TestIndependentPacksWithCyclicDependency { dataIb[i+1] = dataIa[i+1] + 3; dataFb[i+0] = dataFa[i+0] * 1.3f; dataFb[i+1] = dataFa[i+1] * 1.3f; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never + // -> vectorize -> no vectorization } } @@ -143,6 +160,7 @@ public class TestIndependentPacksWithCyclicDependency { @Test @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0", IRNode.VECTOR_CAST_F2I, "> 0", IRNode.VECTOR_CAST_I2F, "> 0"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { @@ -152,6 +170,10 @@ public class TestIndependentPacksWithCyclicDependency { dataFa[i+1] = dataIa[i+1] + 3; dataIb[i+0] = (int)(dataFb[i+0] * 1.3f); dataIb[i+1] = (int)(dataFb[i+1] * 1.3f); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never + // -> vectorize -> no vectorization } } @@ -167,6 +189,7 @@ public class TestIndependentPacksWithCyclicDependency { @Test @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { @@ -176,6 +199,10 @@ public class TestIndependentPacksWithCyclicDependency { unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never + // -> vectorize -> no vectorization } } @@ -192,6 +219,7 @@ public class TestIndependentPacksWithCyclicDependency { @Test @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static void test3(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { @@ -203,6 +231,10 @@ public class TestIndependentPacksWithCyclicDependency { dataFb[i+1] = dataFa[i+1] * 1.3f; dataFb[i+0] = dataFa[i+0] * 1.3f; dataIb[i+1] = dataIa[i+1] + 3; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never + // -> vectorize -> no vectorization } } @@ -269,6 +301,7 @@ public class TestIndependentPacksWithCyclicDependency { @Test @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.ADD_VF, "> 0"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) static void test6(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb, @@ -287,6 +320,10 @@ public class TestIndependentPacksWithCyclicDependency { float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20); unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never + // -> vectorize -> no vectorization } } diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java index 9aaa7cdd8a9..5c7ff6c524d 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java @@ -77,8 +77,10 @@ public class TestMulAddS2I { public static void main(String[] args) { - TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector"); - TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector"); + TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaers"); + TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaers"); + TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaers"); + TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaers"); } @Run(test = {"testa", "testb", "testc", "testd", "teste", "testf", "testg", "testh", @@ -163,19 +165,26 @@ public class TestMulAddS2I { @Test @IR(applyIfCPUFeature = {"sse2", "true"}, applyIfPlatform = {"64-bit", "true"}, - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) @IR(applyIfCPUFeature = {"asimd", "true"}, applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false"}, // AD file requires vector_length = 16 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) public static int[] testd(int[] out) { for (int i = 0; i < ITER-2; i+=2) { // Unrolled, with the same structure. out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); + // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders. + // We need all addresses 8-byte aligned. + // + // out: + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // -> never aligned! } return out; } @@ -183,19 +192,26 @@ public class TestMulAddS2I { @Test @IR(applyIfCPUFeature = {"sse2", "true"}, applyIfPlatform = {"64-bit", "true"}, - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) @IR(applyIfCPUFeature = {"asimd", "true"}, applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) public static int[] teste(int[] out) { for (int i = 0; i < ITER-2; i+=2) { // Unrolled, with some swaps. out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // swap(1 2) + // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders. + // We need all addresses 8-byte aligned. + // + // out: + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // -> never aligned! } return out; } @@ -203,19 +219,26 @@ public class TestMulAddS2I { @Test @IR(applyIfCPUFeature = {"sse2", "true"}, applyIfPlatform = {"64-bit", "true"}, - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) @IR(applyIfCPUFeature = {"asimd", "true"}, applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) public static int[] testf(int[] out) { for (int i = 0; i < ITER-2; i+=2) { // Unrolled, with some swaps. out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr2[2*i+3] * sArr1[2*i+3])); // swap(1 2), swap(3 4) + // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders. + // We need all addresses 8-byte aligned. + // + // out: + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // -> never aligned! } return out; } @@ -223,19 +246,26 @@ public class TestMulAddS2I { @Test @IR(applyIfCPUFeature = {"sse2", "true"}, applyIfPlatform = {"64-bit", "true"}, - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) @IR(applyIfCPUFeature = {"asimd", "true"}, applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) public static int[] testg(int[] out) { for (int i = 0; i < ITER-2; i+=2) { // Unrolled, with some swaps. out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); out[i+1] += ((sArr1[2*i+3] * sArr2[2*i+3]) + (sArr1[2*i+2] * sArr2[2*i+2])); // swap(1 3), swap(2 4) + // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders. + // We need all addresses 8-byte aligned. + // + // out: + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // -> never aligned! } return out; } @@ -243,19 +273,26 @@ public class TestMulAddS2I { @Test @IR(applyIfCPUFeature = {"sse2", "true"}, applyIfPlatform = {"64-bit", "true"}, - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) @IR(applyIfCPUFeature = {"asimd", "true"}, applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, - applyIf = { "UseCompactObjectHeaders", "false" }, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) public static int[] testh(int[] out) { for (int i = 0; i < ITER-2; i+=2) { // Unrolled, with some swaps. out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); out[i+1] += ((sArr2[2*i+3] * sArr1[2*i+3]) + (sArr2[2*i+2] * sArr1[2*i+2])); // swap(1 4), swap(2 3) + // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders. + // We need all addresses 8-byte aligned. + // + // out: + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // -> never aligned! } return out; } diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestScheduleReordersScalarMemops.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestScheduleReordersScalarMemops.java index c54a684c691..0512442c896 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestScheduleReordersScalarMemops.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestScheduleReordersScalarMemops.java @@ -30,7 +30,10 @@ * be reordered during SuperWord::schedule. * @modules java.base/jdk.internal.misc * @library /test/lib / - * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops + * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops nCOH_nAV + * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops nCOH_yAV + * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops yCOH_nAV + * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops yCOH_yAV */ package compiler.loopopts.superword; @@ -50,12 +53,21 @@ public class TestScheduleReordersScalarMemops { float[] goldF1 = new float[RANGE]; public static void main(String args[]) { - TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", - "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestScheduleReordersScalarMemops::test*", - "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestScheduleReordersScalarMemops::verify", - "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestScheduleReordersScalarMemops::init", - "-XX:-TieredCompilation", "-Xbatch", - "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000"); + TestFramework framework = new TestFramework(TestScheduleReordersScalarMemops.class); + framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", + "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestScheduleReordersScalarMemops::test*", + "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestScheduleReordersScalarMemops::verify", + "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestScheduleReordersScalarMemops::init", + "-XX:-TieredCompilation", "-Xbatch", + "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000"); + switch (args[0]) { + case "nCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); } + case "nCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); } + case "yCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); } + case "yCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); } + default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } + }; + framework.start(); } TestScheduleReordersScalarMemops() { @@ -79,6 +91,7 @@ public class TestScheduleReordersScalarMemops { @Test @IR(counts = {IRNode.MUL_VI, "> 0"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { for (int i = 0; i < RANGE; i+=2) { @@ -103,6 +116,10 @@ public class TestScheduleReordersScalarMemops { dataIb[i + 0] = (int)dataFb[i + 0] * 11; // X *11 dataIb[i + 1] = (int)dataFb[i + 1] * 11; // Y *11 dataFa[i + 1] = dataIa[i + 1] + 1.2f; // B +1.2 + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never + // -> vectorize -> no vectorization } } @@ -119,6 +136,7 @@ public class TestScheduleReordersScalarMemops { @Test @IR(counts = {IRNode.MUL_VI, "> 0"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { for (int i = 0; i < RANGE; i+=2) { @@ -128,6 +146,10 @@ public class TestScheduleReordersScalarMemops { dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] * 11); // B *11 + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never + // -> vectorize -> no vectorization } } diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestSplitPacks.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestSplitPacks.java index 1824f18c8ff..bd47f7b9331 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestSplitPacks.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestSplitPacks.java @@ -37,7 +37,10 @@ import java.nio.ByteOrder; * @bug 8326139 * @summary Test splitting packs in SuperWord * @library /test/lib / - * @run driver compiler.loopopts.superword.TestSplitPacks + * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV + * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV + * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV + * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV */ public class TestSplitPacks { @@ -70,7 +73,16 @@ public class TestSplitPacks { } public static void main(String[] args) { - TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000"); + TestFramework framework = new TestFramework(TestSplitPacks.class); + framework.addFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000"); + switch (args[0]) { + case "nCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); } + case "nCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); } + case "yCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); } + case "yCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); } + default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } + }; + framework.start(); } public TestSplitPacks() { @@ -266,7 +278,15 @@ public class TestSplitPacks { IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0", IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"MaxVectorSize", ">=32"}, + applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) // Load and store are already split @@ -291,6 +311,10 @@ public class TestSplitPacks { b[i+5] = b5; b[i+6] = b6; b[i+7] = b7; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never + // -> vectorize -> no vectorization } return new Object[]{ a, b }; } @@ -301,7 +325,15 @@ public class TestSplitPacks { IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"MaxVectorSize", ">=32"}, + applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) // Adjacent Load and Store, but split by Add/Mul @@ -314,6 +346,10 @@ public class TestSplitPacks { b[i+4] = a[i+4] * mask; // Mul b[i+5] = a[i+5] * mask; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never + // -> vectorize -> no vectorization } return new Object[]{ a, b }; } @@ -324,7 +360,15 @@ public class TestSplitPacks { IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0", IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"MaxVectorSize", ">=32"}, + applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) // Adjacent Load and Store, but split by Add/Mul @@ -337,6 +381,10 @@ public class TestSplitPacks { b[i+4] = a[i+4] + mask; // Add b[i+5] = a[i+5] + mask; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never + // -> vectorize -> no vectorization } return new Object[]{ a, b }; } @@ -347,7 +395,15 @@ public class TestSplitPacks { IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0", IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"MaxVectorSize", ">=32"}, + applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) // Adjacent Load and Store, but split by Add/Mul @@ -360,6 +416,10 @@ public class TestSplitPacks { b[i+3] = a[i+3] * mask; b[i+4] = a[i+4] * mask; b[i+5] = a[i+5] * mask; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never + // -> vectorize -> no vectorization } return new Object[]{ a, b }; } @@ -370,7 +430,15 @@ public class TestSplitPacks { IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"MaxVectorSize", ">=32"}, + applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) // Adjacent Load and Store, but split by Add/Mul @@ -383,6 +451,10 @@ public class TestSplitPacks { b[i+3] = a[i+3] + mask; b[i+4] = a[i+4] + mask; b[i+5] = a[i+5] + mask; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never + // -> vectorize -> no vectorization } return new Object[]{ a, b }; } @@ -393,7 +465,15 @@ public class TestSplitPacks { IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0", IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"MaxVectorSize", ">=32"}, + applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) // Split the load @@ -420,6 +500,10 @@ public class TestSplitPacks { b[i+5] = b3; b[i+6] = b4; b[i+7] = b5; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never + // -> vectorize -> no vectorization } return new Object[]{ a, b }; } @@ -430,7 +514,15 @@ public class TestSplitPacks { IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0", IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"MaxVectorSize", ">=32"}, + applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) // Split the load @@ -457,6 +549,10 @@ public class TestSplitPacks { b[i+6] = b4; b[i+7] = b5; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never + // -> vectorize -> no vectorization } return new Object[]{ a, b }; } @@ -467,7 +563,15 @@ public class TestSplitPacks { IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0", IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"MaxVectorSize", ">=32"}, + applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) // Split the load @@ -494,6 +598,10 @@ public class TestSplitPacks { b[i+3] = b5; b[i+4] = b6; b[i+5] = b7; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never + // -> vectorize -> no vectorization } return new Object[]{ a, b }; } @@ -504,7 +612,15 @@ public class TestSplitPacks { IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0", IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"MaxVectorSize", ">=32"}, + applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0", + IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) // Split the load @@ -531,6 +647,10 @@ public class TestSplitPacks { b[i+3] = b3; b[i+4] = b6; b[i+5] = b7; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never + // -> vectorize -> no vectorization } return new Object[]{ a, b }; } @@ -538,7 +658,12 @@ public class TestSplitPacks { @Test @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIf = {"MaxVectorSize", ">=32"}, + applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) // 0 1 2 3 4 5 6 7 - @@ -551,30 +676,35 @@ public class TestSplitPacks { static Object[] test3a(short[] a, short[] b, short val) { int sum = 0; for (int i = 0; i < RANGE; i+=16) { - short a0 = a[i+0]; // required for alignment / offsets, technical limitation. + short a0 = a[i+0]; // required for alignment / offsets, technical limitation. - short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off - short a2 = a[i+2]; - short a3 = a[i+3]; + short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off + short a2 = a[i+2]; + short a3 = a[i+3]; - short a4 = a[i+4]; // 4-pack - short a5 = a[i+5]; - short a6 = a[i+6]; - short a7 = a[i+7]; + short a4 = a[i+4]; // 4-pack + short a5 = a[i+5]; + short a6 = a[i+6]; + short a7 = a[i+7]; - b[i+0] = a0; // required for alignment / offsets, technical limitation. + b[i+0] = a0; // required for alignment / offsets, technical limitation. - sum += a1 + a2 + a3; // not packed + sum += a1 + a2 + a3; // not packed - b[i+3] = val; // adjacent to 4-pack but needs to be split off + b[i+3] = val; // adjacent to 4-pack but needs to be split off - b[i+4] = a4; // 4-pack - b[i+5] = a5; - b[i+6] = a6; - b[i+7] = a7; + b[i+4] = a4; // 4-pack + b[i+5] = a5; + b[i+6] = a6; + b[i+7] = a7; - b[i+8] = val; // adjacent to 4-pack but needs to be split off + b[i+8] = val; // adjacent to 4-pack but needs to be split off + + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 8 + 32*i -> always adr = base + 12 + 8 + 32*i -> never + // -> vectorize -> no vectorization } return new Object[]{ a, b, new int[]{ sum } }; } @@ -718,7 +848,15 @@ public class TestSplitPacks { IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0", IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop IRNode.ADD_REDUCTION_V, "> 0"}, - applyIf = {"MaxVectorSize", ">=32"}, + applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"}, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0", + IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop + IRNode.ADD_REDUCTION_V, "> 0"}, + applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) // Split packs including reductions @@ -734,6 +872,10 @@ public class TestSplitPacks { s += a[i+5] & b[i+5]; s += a[i+6] & b[i+6]; s += a[i+7] & b[i+7]; + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never + // -> vectorize -> no vectorization } return new Object[]{ a, b, new int[]{ s } }; } diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestUnorderedReductionPartialVectorization.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestUnorderedReductionPartialVectorization.java index 0d4a4e7b5d8..6150e24cc5e 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestUnorderedReductionPartialVectorization.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestUnorderedReductionPartialVectorization.java @@ -39,7 +39,10 @@ public class TestUnorderedReductionPartialVectorization { static final int ITER = 10; public static void main(String[] args) { - TestFramework.run(); + TestFramework.runWithFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); + TestFramework.runWithFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); + TestFramework.runWithFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); + TestFramework.runWithFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); } @Run(test = {"test1"}) @@ -61,6 +64,7 @@ public class TestUnorderedReductionPartialVectorization { @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", IRNode.VECTOR_CAST_I2L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", IRNode.OR_REDUCTION_V, "> 0",}, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"avx2", "true"}) static long test1(int[] data, long sum) { @@ -88,6 +92,11 @@ public class TestUnorderedReductionPartialVectorization { // no vectorization. We now ensure there are again 2 packs per operation with a 2x hand unroll. int v2 = data[i + 1]; sum |= v2; + + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never + // -> vectorize -> no vectorization } return sum; } diff --git a/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java b/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java index 2fd5364a78b..3eb3f3eebe9 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java @@ -27,7 +27,10 @@ * @summary Auto-vectorize Float.floatToFloat16, Float.float16ToFloat APIs * @requires vm.compiler2.enabled * @library /test/lib / - * @run driver compiler.vectorization.TestFloatConversionsVector + * @run driver compiler.vectorization.TestFloatConversionsVector nCOH_nAV + * @run driver compiler.vectorization.TestFloatConversionsVector nCOH_yAV + * @run driver compiler.vectorization.TestFloatConversionsVector yCOH_nAV + * @run driver compiler.vectorization.TestFloatConversionsVector yCOH_yAV */ package compiler.vectorization; @@ -44,18 +47,32 @@ public class TestFloatConversionsVector { private static float [] fout; public static void main(String args[]) { - TestFramework.runWithFlags("-XX:-TieredCompilation", - "-XX:CompileThresholdScaling=0.3"); + TestFramework framework = new TestFramework(TestFloatConversionsVector.class); + framework.addFlags("-XX:-TieredCompilation", "-XX:CompileThresholdScaling=0.3"); + switch (args[0]) { + case "nCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); } + case "nCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); } + case "yCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); } + case "yCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); } + default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } + }; + framework.start(); System.out.println("PASSED"); } @Test @IR(counts = {IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE + "min(max_float, max_short)", "> 0"}, - applyIfPlatformOr = {"x64", "true", "aarch64", "true", "riscv64", "true"}, - applyIfCPUFeatureOr = {"f16c", "true", "avx512f", "true", "zvfh", "true", "asimd", "true", "sve", "true"}) + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, + applyIfPlatformOr = {"x64", "true", "aarch64", "true", "riscv64", "true"}, + applyIfCPUFeatureOr = {"f16c", "true", "avx512f", "true", "zvfh", "true", "asimd", "true", "sve", "true"}) public void test_float_float16(short[] sout, float[] finp) { for (int i = 0; i < finp.length; i++) { sout[i] = Float.floatToFloat16(finp[i]); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // F_adr = base + 16 + 4*i -> i % 2 = 0 F_adr = base + 12 + 4*i -> i % 2 = 1 + // S_adr = base + 16 + 2*i -> i % 4 = 0 S_adr = base + 12 + 2*i -> i % 4 = 2 + // -> vectorize -> no vectorization } } @@ -114,11 +131,17 @@ public class TestFloatConversionsVector { @Test @IR(counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE + "min(max_float, max_short)", "> 0"}, - applyIfPlatformOr = {"x64", "true", "aarch64", "true", "riscv64", "true"}, - applyIfCPUFeatureOr = {"f16c", "true", "avx512f", "true", "zvfh", "true", "asimd", "true", "sve", "true"}) + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, + applyIfPlatformOr = {"x64", "true", "aarch64", "true", "riscv64", "true"}, + applyIfCPUFeatureOr = {"f16c", "true", "avx512f", "true", "zvfh", "true", "asimd", "true", "sve", "true"}) public void test_float16_float(float[] fout, short[] sinp) { for (int i = 0; i < sinp.length; i++) { fout[i] = Float.float16ToFloat(sinp[i]); + // With AlignVector, we need 8-byte alignment of vector loads/stores. + // UseCompactObjectHeaders=false UseCompactObjectHeaders=true + // F_adr = base + 16 + 4*i -> i % 2 = 0 F_adr = base + 12 + 4*i -> i % 2 = 1 + // S_adr = base + 16 + 2*i -> i % 4 = 0 S_adr = base + 12 + 2*i -> i % 4 = 2 + // -> vectorize -> no vectorization } } diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java index 60d3c05dab6..0da101a8fb7 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java @@ -1,5 +1,6 @@ /* * Copyright (c) 2022, 2023, Arm Limited. All rights reserved. + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,21 +30,53 @@ * @build jdk.test.whitebox.WhiteBox * compiler.vectorization.runner.VectorizationTestRunner * + * @requires vm.compiler2.enabled + * * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox + * * @run main/othervm -Xbootclasspath/a:. * -XX:+UnlockDiagnosticVMOptions * -XX:+WhiteBoxAPI - * compiler.vectorization.runner.ArrayTypeConvertTest + * compiler.vectorization.runner.ArrayTypeConvertTest nCOH_nAV * - * @requires vm.compiler2.enabled + * @run main/othervm -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * compiler.vectorization.runner.ArrayTypeConvertTest nCOH_yAV + * + * @run main/othervm -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * compiler.vectorization.runner.ArrayTypeConvertTest yCOH_nAV + * + * @run main/othervm -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * compiler.vectorization.runner.ArrayTypeConvertTest yCOH_yAV */ package compiler.vectorization.runner; import compiler.lib.ir_framework.*; +// Explanation about AlignVector: we require 8-byte alignment of all addresses. +// But the array base offset changes with UseCompactObjectHeaders. +// This means it affects the alignment constraints. + public class ArrayTypeConvertTest extends VectorizationTestRunner { + // We must pass the flags directly to the test-VM, and not the driver vm in the @run above. + @Override + protected String[] testVMFlags(String[] args) { + return switch (args[0]) { + case "nCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"}; + case "nCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"}; + case "yCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"}; + case "yCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"}; + default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } + }; + } + private static final int SIZE = 543; private byte[] bytes; @@ -75,6 +108,10 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { // ---------------- Integer Extension ---------------- @Test + @IR(failOn = {IRNode.STORE_VECTOR}) + // Subword vector casts do not work currently, see JDK-8342095. + // Assert the vectorization failure so that we are reminded to update + // the test when this limitation is addressed in the future. public int[] signExtension() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -84,6 +121,10 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { } @Test + @IR(failOn = {IRNode.STORE_VECTOR}) + // Subword vector casts do not work currently, see JDK-8342095. + // Assert the vectorization failure so that we are reminded to update + // the test when this limitation is addressed in the future. public int[] zeroExtension() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -93,6 +134,10 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { } @Test + @IR(failOn = {IRNode.STORE_VECTOR}) + // Subword vector casts do not work currently, see JDK-8342095. + // Assert the vectorization failure so that we are reminded to update + // the test when this limitation is addressed in the future. public int[] signExtensionFromByte() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -103,6 +148,10 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { // ---------------- Integer Narrow ---------------- @Test + @IR(failOn = {IRNode.STORE_VECTOR}) + // Subword vector casts do not work currently, see JDK-8342095. + // Assert the vectorization failure so that we are reminded to update + // the test when this limitation is addressed in the future. public short[] narrowToSigned() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -112,6 +161,10 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { } @Test + @IR(failOn = {IRNode.STORE_VECTOR}) + // Subword vector casts do not work currently, see JDK-8342095. + // Assert the vectorization failure so that we are reminded to update + // the test when this limitation is addressed in the future. public char[] narrowToUnsigned() { char[] res = new char[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -121,6 +174,10 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { } @Test + @IR(failOn = {IRNode.STORE_VECTOR}) + // Subword vector casts do not work currently, see JDK-8342095. + // Assert the vectorization failure so that we are reminded to update + // the test when this limitation is addressed in the future. public byte[] NarrowToByte() { byte[] res = new byte[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -177,11 +234,19 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { // ---------------- Convert Subword-I to F/D ---------------- @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "avx2", "true"}, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, counts = {IRNode.VECTOR_CAST_S2F, IRNode.VECTOR_SIZE + "min(max_short, max_float)", ">0"}) public float[] convertShortToFloat() { float[] res = new float[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = (float) shorts[i]; + // AlignVector=true requires that all vector load/store are 8-byte aligned. + // F_adr = base + UNSAFE.ARRAY_FLOAT_BASE_OFFSET + 4*i + // = 16 (UseCompactObjectHeaders=false) -> i % 2 = 0 + // = 12 (UseCompactObjectHeaders=true ) -> i % 2 = 1 + // S_adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*i + // = 16 (UseCompactObjectHeaders=false) -> i % 4 = 0 -> can align both + // = 12 (UseCompactObjectHeaders=true ) -> i % 4 = 2 -> cannot align both } return res; } @@ -199,6 +264,10 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { } @Test + @IR(failOn = {IRNode.STORE_VECTOR}) + // Subword vector casts do not work currently, see JDK-8342095. + // Assert the vectorization failure so that we are reminded to update + // the test when this limitation is addressed in the future. public float[] convertCharToFloat() { float[] res = new float[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -208,6 +277,10 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { } @Test + @IR(failOn = {IRNode.STORE_VECTOR}) + // Subword vector casts do not work currently, see JDK-8342095. + // Assert the vectorization failure so that we are reminded to update + // the test when this limitation is addressed in the future. public double[] convertCharToDouble() { double[] res = new double[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -264,22 +337,38 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { // ---------------- Convert F/D to Subword-I ---------------- @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "avx2", "true"}, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, counts = {IRNode.VECTOR_CAST_F2S, IRNode.VECTOR_SIZE + "min(max_float, max_short)", ">0"}) public short[] convertFloatToShort() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = (short) floats[i]; + // AlignVector=true requires that all vector load/store are 8-byte aligned. + // F_adr = base + UNSAFE.ARRAY_FLOAT_BASE_OFFSET + 4*i + // = 16 (UseCompactObjectHeaders=false) -> i % 2 = 0 + // = 12 (UseCompactObjectHeaders=true ) -> i % 2 = 1 + // S_adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*i + // = 16 (UseCompactObjectHeaders=false) -> i % 4 = 0 -> can align both + // = 12 (UseCompactObjectHeaders=true ) -> i % 4 = 2 -> cannot align both } return res; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "avx2", "true"}, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, counts = {IRNode.VECTOR_CAST_F2S, IRNode.VECTOR_SIZE + "min(max_float, max_char)", ">0"}) public char[] convertFloatToChar() { char[] res = new char[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = (char) floats[i]; + // AlignVector=true requires that all vector load/store are 8-byte aligned. + // F_adr = base + UNSAFE.ARRAY_FLOAT_BASE_OFFSET + 4*i + // = 16 (UseCompactObjectHeaders=false) -> i % 2 = 0 + // = 12 (UseCompactObjectHeaders=true ) -> i % 2 = 1 + // S_adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*i + // = 16 (UseCompactObjectHeaders=false) -> i % 4 = 0 -> can align both + // = 12 (UseCompactObjectHeaders=true ) -> i % 4 = 2 -> cannot align both } return res; } diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/LoopCombinedOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/LoopCombinedOpTest.java index 8a0715eadfe..2fd6a053de7 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/LoopCombinedOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/LoopCombinedOpTest.java @@ -30,13 +30,29 @@ * @build jdk.test.whitebox.WhiteBox * compiler.vectorization.runner.VectorizationTestRunner * + * @requires vm.compiler2.enabled + * * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox + * * @run main/othervm -Xbootclasspath/a:. * -XX:+UnlockDiagnosticVMOptions * -XX:+WhiteBoxAPI - * compiler.vectorization.runner.LoopCombinedOpTest + * compiler.vectorization.runner.LoopCombinedOpTest nCOH_nAV * - * @requires vm.compiler2.enabled + * @run main/othervm -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * compiler.vectorization.runner.LoopCombinedOpTest nCOH_yAV + * + * @run main/othervm -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * compiler.vectorization.runner.LoopCombinedOpTest yCOH_nAV + * + * @run main/othervm -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * compiler.vectorization.runner.LoopCombinedOpTest yCOH_yAV */ package compiler.vectorization.runner; @@ -47,6 +63,18 @@ import java.util.Random; public class LoopCombinedOpTest extends VectorizationTestRunner { + // We must pass the flags directly to the test-VM, and not the driver vm in the @run above. + @Override + protected String[] testVMFlags(String[] args) { + return switch (args[0]) { + case "nCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"}; + case "nCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"}; + case "yCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"}; + case "yCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"}; + default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } + }; + } + private static final int SIZE = 543; private int[] a; @@ -84,7 +112,8 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_I, "> 0"}) public int[] opWithConstant() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -95,7 +124,8 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_I, "> 0"}) public int[] opWithLoopInvariant() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -106,7 +136,8 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_I, "> 0"}) public int[] opWithConstantAndLoopInvariant() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -117,7 +148,8 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOps() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -128,7 +160,8 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOpsWithMultipleConstants() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -139,7 +172,8 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_I, "> 0"}) // With sse2, the MulI does not vectorize. This means we have vectorized stores // to res1, but scalar loads from res1. The store-to-load-forwarding failure // detection catches this and rejects vectorization. @@ -157,7 +191,8 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleStoresWithCommonSubExpression() { int[] res1 = new int[SIZE]; int[] res2 = new int[SIZE]; @@ -172,20 +207,43 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_S, "> 0", + IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOpsWith2DifferentTypes() { short[] res1 = new short[SIZE]; int[] res2 = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res1[i] = (short) (s1[i] + s2[i]); res2[i] = a[i] + b[i]; + // We have a mix of int and short loads/stores. + // With UseCompactObjectHeaders and AlignVector, + // we must 8-byte align all vector loads/stores. + // + // int: + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // If UseCompactObjectHeaders=false: iter % 2 = 0 + // If UseCompactObjectHeaders=true: iter % 2 = 1 + // + // byte: + // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // If UseCompactObjectHeaders=false: iter % 8 = 0 + // If UseCompactObjectHeaders=true: iter % 8 = 4 + // + // -> we cannot align both if UseCompactObjectHeaders=true. } return res2; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_ANY, "> 0", + IRNode.LOAD_VECTOR_L, "> 0"}) public long[] multipleOpsWith3DifferentTypes() { short[] res1 = new short[SIZE]; int[] res2 = new int[SIZE]; @@ -194,13 +252,32 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { res1[i] = (short) (s1[i] + s2[i]); res2[i] = a[i] + b[i]; res3[i] = l1[i] + l2[i]; + // We have a mix of int and short loads/stores. + // With UseCompactObjectHeaders and AlignVector, + // we must 8-byte align all vector loads/stores. + // + // int: + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // If UseCompactObjectHeaders=false: iter % 2 = 0 + // If UseCompactObjectHeaders=true: iter % 2 = 1 + // + // byte: + // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // If UseCompactObjectHeaders=false: iter % 8 = 0 + // If UseCompactObjectHeaders=true: iter % 8 = 4 + // + // -> we cannot align both if UseCompactObjectHeaders=true. } return res3; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_ANY, "> 0", + IRNode.LOAD_VECTOR_L, "> 0"}) public long[] multipleOpsWith2NonAdjacentTypes() { short[] res1 = new short[SIZE]; long[] res2 = new long[SIZE]; @@ -213,50 +290,93 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_S, "> 0", + IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOpsWith2DifferentTypesAndConstant() { short[] res1 = new short[SIZE]; int[] res2 = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res1[i] = (short) (s1[i] + s2[i]); res2[i] = a[i] + 88888888;; + // We have a mix of int and short loads/stores. + // With UseCompactObjectHeaders and AlignVector, + // we must 8-byte align all vector loads/stores. + // + // int: + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // If UseCompactObjectHeaders=false: iter % 2 = 0 + // If UseCompactObjectHeaders=true: iter % 2 = 1 + // + // byte: + // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // If UseCompactObjectHeaders=false: iter % 8 = 0 + // If UseCompactObjectHeaders=true: iter % 8 = 4 + // + // -> we cannot align both if UseCompactObjectHeaders=true. } return res2; } @Test - @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, - counts = {IRNode.STORE_VECTOR, ">0"}) + @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_S, "> 0", + IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOpsWith2DifferentTypesAndInvariant() { short[] res1 = new short[SIZE]; int[] res2 = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res1[i] = (short) (s1[i] + s2[i]); res2[i] = a[i] * intInv; + // We have a mix of int and short loads/stores. + // With UseCompactObjectHeaders and AlignVector, + // we must 8-byte align all vector loads/stores. + // + // int: + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // If UseCompactObjectHeaders=false: iter % 2 = 0 + // If UseCompactObjectHeaders=true: iter % 2 = 1 + // + // byte: + // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // If UseCompactObjectHeaders=false: iter % 8 = 0 + // If UseCompactObjectHeaders=true: iter % 8 = 4 + // + // -> we cannot align both if UseCompactObjectHeaders=true. } return res2; } @Test - @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, - // This test fails with compact headers, but only with UseSSE<=3. - applyIf = { "UseCompactObjectHeaders", "false" }, - counts = {IRNode.STORE_VECTOR, ">0"}) + @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_S, "> 0", + IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOpsWith2DifferentTypesAndComplexExpression() { short[] res1 = new short[SIZE]; int[] res2 = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res1[i] = (short) (s1[i] + s2[i]); res2[i] = a[i] * (b[i] + intInv * c[i] & 0xfffffa); + // same argument as in multipleOpsWith2DifferentTypesAndInvariant. } return res2; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse3", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_S, "> 0", + IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOpsWith2DifferentTypesAndSharedOp() { int i = 0, sum = 0; int[] res1 = new int[SIZE]; @@ -264,11 +384,29 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { while (++i < SIZE) { sum += (res1[i]--); res2[i]++; + // We have a mix of int and short loads/stores. + // With UseCompactObjectHeaders and AlignVector, + // we must 8-byte align all vector loads/stores. + // + // int: + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // If UseCompactObjectHeaders=false: iter % 2 = 0 + // If UseCompactObjectHeaders=true: iter % 2 = 1 + // + // byte: + // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // If UseCompactObjectHeaders=false: iter % 8 = 0 + // If UseCompactObjectHeaders=true: iter % 8 = 4 + // + // -> we cannot align both if UseCompactObjectHeaders=true. } return res1; } @Test + // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. public int[] fillIndexPlusStride() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -278,6 +416,7 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { } @Test + // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. public int[] addArrayWithIndex() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -287,6 +426,7 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { } @Test + // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. public short[] multiplyAddShortIndex() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -296,6 +436,7 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { } @Test + // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. public int[] multiplyBySumOfIndexAndInvariant() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -306,19 +447,30 @@ public class LoopCombinedOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, + applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, counts = {IRNode.STORE_VECTOR, ">0"}) public int[] manuallyUnrolledStride2() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE - 1; i += 2) { res[i] = a[i] * b[i]; res[i + 1] = a[i + 1] * b[i + 1]; + // Hand-unrolling can mess with alignment! + // + // With UseCompactObjectHeaders and AlignVector, + // we must 8-byte align all vector loads/stores. + // + // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter + // = 16 (or 12 if UseCompactObjectHeaders=true) + // If UseCompactObjectHeaders=false: 16 divisible by 8 -> vectorize + // If UseCompactObjectHeaders=true: 12 not divisibly by 8 -> not vectorize } return res; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, - counts = {IRNode.STORE_VECTOR, ">0"}) + counts = {IRNode.STORE_VECTOR, ">0", + IRNode.LOAD_VECTOR_I, "> 0"}) public int partialVectorizableLoop() { int[] res = new int[SIZE]; int k = 9; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/VectorizationTestRunner.java b/test/hotspot/jtreg/compiler/vectorization/runner/VectorizationTestRunner.java index ff8787eb913..7f8e4ec3b39 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/VectorizationTestRunner.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/VectorizationTestRunner.java @@ -44,7 +44,7 @@ public class VectorizationTestRunner { private static final int NMETHOD_COMP_LEVEL_IDX = 1; private static final int NMETHOD_INSTS_IDX = 2; - protected void run() { + protected void run(String[] args) { Class klass = getClass(); // 1) Vectorization correctness test @@ -68,9 +68,15 @@ public class VectorizationTestRunner { // To test vectorizability, invoke the IR test framework to check existence of // expected C2 IR node. TestFramework irTest = new TestFramework(klass); + irTest.addFlags(testVMFlags(args)); irTest.start(); } + // Override this to add extra flags. + protected String[] testVMFlags(String[] args) { + return new String[0]; // by default no extra flags + } + private void verifyTestMethod(Method method) { // Check method parameter count if (method.getParameterCount() > 0) { @@ -191,6 +197,6 @@ public class VectorizationTestRunner { public static void main(String[] args) { VectorizationTestRunner testObj = createTestInstance(Utils.TEST_NAME); - testObj.run(); + testObj.run(args); } } diff --git a/test/hotspot/jtreg/testlibrary_tests/ir_framework/examples/IRExample.java b/test/hotspot/jtreg/testlibrary_tests/ir_framework/examples/IRExample.java index 539673a6c28..6a0a605e4ab 100644 --- a/test/hotspot/jtreg/testlibrary_tests/ir_framework/examples/IRExample.java +++ b/test/hotspot/jtreg/testlibrary_tests/ir_framework/examples/IRExample.java @@ -179,7 +179,7 @@ public class IRExample { @Test // In some cases, we can know the exact size, here 4 @IR(counts = {IRNode.LOAD_VECTOR_F, IRNode.VECTOR_SIZE_4, "> 0"}, - applyIf = {"MaxVectorSize", ">=16"}, + applyIfAnd = {"MaxVectorSize", ">=16", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) // Hence, we know any other sizes are impossible. // We can also specify that explicitly for failOn @@ -206,7 +206,7 @@ public class IRExample { // Here, we can pack at most 8 given the 8-blocks and 8-gaps. // But we can also never pack more than max_float @IR(counts = {IRNode.LOAD_VECTOR_F, IRNode.VECTOR_SIZE + "min(8, max_float)", "> 0"}, - applyIf = {"MaxVectorSize", ">=16"}, + applyIfAnd = {"MaxVectorSize", ">=16", "AlignVector", "false"}, applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) static float[] testVectorNodeSizeMinClause() { float[] a = new float[1024*8];