8300258: C2: vectorization fails on simple ByteBuffer loop

Co-authored-by: Emanuel Peter <epeter@openjdk.org>
Reviewed-by: epeter, kvn
This commit is contained in:
Roland Westrelin 2023-03-09 08:00:24 +00:00
parent 5e232cf0a9
commit dc523a58a6
4 changed files with 425 additions and 11 deletions
src/hotspot/share/opto
test/hotspot/jtreg/compiler

@ -662,9 +662,9 @@ void SuperWord::find_adjacent_refs() {
}
}
} else {
if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
if (same_memory_slice(best_align_to_mem_ref, mem_ref)) {
// Can't allow vectorization of unaligned memory accesses with the
// same type since it could be overlapped accesses to the same array.
// same memory slice since it could be overlapped accesses to the same array.
create_pack = false;
} else {
// Allow independent (different type) unaligned memory operations
@ -672,7 +672,7 @@ void SuperWord::find_adjacent_refs() {
if (vectors_should_be_aligned()) {
create_pack = false;
} else {
// Check if packs of the same memory type but
// Check if packs of the same memory slice but
// with a different alignment were created before.
for (uint i = 0; i < align_to_refs.size(); i++) {
MemNode* mr = align_to_refs.at(i)->as_Mem();
@ -680,7 +680,7 @@ void SuperWord::find_adjacent_refs() {
// Skip when we are looking at same memory operation.
continue;
}
if (same_velt_type(mr, mem_ref) &&
if (same_memory_slice(mem_ref, mr) &&
memory_alignment(mr, iv_adjustment) != 0)
create_pack = false;
}
@ -708,25 +708,25 @@ void SuperWord::find_adjacent_refs() {
}
}
} else { // Don't create unaligned pack
// First, remove remaining memory ops of the same type from the list.
// First, remove remaining memory ops of the same memory slice from the list.
for (int i = memops.size() - 1; i >= 0; i--) {
MemNode* s = memops.at(i)->as_Mem();
if (same_velt_type(s, mem_ref)) {
if (same_memory_slice(s, mem_ref) || same_velt_type(s, mem_ref)) {
memops.remove(i);
}
}
// Second, remove already constructed packs of the same type.
// Second, remove already constructed packs of the same memory slice.
for (int i = _packset.length() - 1; i >= 0; i--) {
Node_List* p = _packset.at(i);
MemNode* s = p->at(0)->as_Mem();
if (same_velt_type(s, mem_ref)) {
if (same_memory_slice(s, mem_ref) || same_velt_type(s, mem_ref)) {
remove_pack_at(i);
}
}
// If needed find the best memory reference for loop alignment again.
if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
if (same_memory_slice(mem_ref, best_align_to_mem_ref) || same_velt_type(mem_ref, best_align_to_mem_ref)) {
// Put memory ops from remaining packs back on memops list for
// the best alignment search.
uint orig_msize = memops.size();
@ -1253,9 +1253,9 @@ bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) {
// FIXME - co_locate_pack fails on Stores in different mem-slices, so
// only pack memops that are in the same alias set until that's fixed.
if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=
_phase->C->get_alias_index(s2->as_Mem()->adr_type()))
if (!same_memory_slice(s1->as_Mem(), s2->as_Mem())) {
return false;
}
SWPointer p1(s1->as_Mem(), this, NULL, false);
SWPointer p2(s2->as_Mem(), this, NULL, false);
if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
@ -3681,6 +3681,10 @@ bool SuperWord::same_velt_type(Node* n1, Node* n2) {
return vt1 == vt2;
}
bool SuperWord::same_memory_slice(MemNode* best_align_to_mem_ref, MemNode* mem_ref) const {
return _phase->C->get_alias_index(mem_ref->adr_type()) == _phase->C->get_alias_index(best_align_to_mem_ref->adr_type());
}
//------------------------------in_packset---------------------------
// Are s1 and s2 in a pack pair and ordered as s1,s2?
bool SuperWord::in_packset(Node* s1, Node* s2) {

@ -449,6 +449,7 @@ class SuperWord : public ResourceObj {
BasicType velt_basic_type(Node* n) { return velt_type(n)->array_element_basic_type(); }
void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; }
bool same_velt_type(Node* n1, Node* n2);
bool same_memory_slice(MemNode* best_align_to_mem_ref, MemNode* mem_ref) const;
// my_pack
Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }

@ -0,0 +1,339 @@
/*
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.c2.irTests;
import compiler.lib.ir_framework.*;
import jdk.test.lib.Utils;
import jdk.test.whitebox.WhiteBox;
import jdk.internal.misc.Unsafe;
import java.util.Random;
import java.util.Arrays;
import java.nio.ByteOrder;
/*
* @test
* @bug 8300258
* @key randomness
* @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64")
* @summary C2: vectorization fails on simple ByteBuffer loop
* @modules java.base/jdk.internal.misc
* @library /test/lib /
* @build jdk.test.whitebox.WhiteBox
* @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
* @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI compiler.c2.irTests.TestVectorizationMismatchedAccess
*/
public class TestVectorizationMismatchedAccess {
private static final Unsafe UNSAFE = Unsafe.getUnsafe();
private static final Random RANDOM = Utils.getRandomInstance();
private final static WhiteBox wb = WhiteBox.getWhiteBox();
public static void main(String[] args) {
Object alignVector = wb.getVMFlag("AlignVector");
if (alignVector != null && !((Boolean)alignVector)) {
if (ByteOrder.nativeOrder() != ByteOrder.LITTLE_ENDIAN) {
throw new RuntimeException("fix test that was written for a little endian platform");
}
TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED");
}
}
static int size = 1024;
static byte[] byteArray = new byte[size * 8];
static long[] longArray = new long[size];
static byte[] verifyByteArray = new byte[size * 8];
static long[] verifyLongArray = new long[size];
static long baseOffset = 0;
static long baseOffHeap = UNSAFE.allocateMemory(size * 8);
static {
for (int i = 0; i < verifyByteArray.length; i++) {
verifyByteArray[i] = (byte)RANDOM.nextInt(Byte.MAX_VALUE);
}
for (int i = 0; i < verifyLongArray.length; i++) {
verifyLongArray[i] = 0;
for (int j = 0; j < 8; j++) {
verifyLongArray[i] = verifyLongArray[i] | (((long)verifyByteArray[8 * i + j]) << 8 * j);
}
}
}
static private void runAndVerify(Runnable test, int offset) {
System.arraycopy(verifyLongArray, 0, longArray, 0, longArray.length);
Arrays.fill(byteArray, (byte)0);
test.run();
int i;
for (i = 0; i < Math.max(offset, 0); i++) {
if (byteArray[i] != 0) {
throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0");
}
}
for (; i < Math.min(byteArray.length + offset, byteArray.length); i++) {
if (byteArray[i] != verifyByteArray[i - offset]) {
throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i-offset]);
}
}
for (; i < byteArray.length; i++) {
if (byteArray[i] != 0) {
throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0");
}
}
}
static private void runAndVerify2(Runnable test, int offset) {
System.arraycopy(verifyByteArray, 0, byteArray, 0, byteArray.length);
test.run();
int i;
for (i = 0; i < Math.max(offset, 0); i++) {
if (byteArray[i] != verifyByteArray[i]) {
throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i]);
}
}
for (; i < Math.min(byteArray.length + offset, byteArray.length); i++) {
int val = offset > 0 ? verifyByteArray[(i-offset) % 8] : verifyByteArray[i-offset];
if (byteArray[i] != val) {
throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i-offset]);
}
}
for (; i < byteArray.length; i++) {
if (byteArray[i] != verifyByteArray[i]) {
throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i]);
}
}
}
static private void runAndVerify3(Runnable test, int offset) {
System.arraycopy(verifyLongArray, 0, longArray, 0, longArray.length);
for (int i = 0; i < size * 8; i++) {
UNSAFE.putByte(null, baseOffHeap + i, (byte)0);
}
test.run();
int i;
for (i = 0; i < Math.max(offset, 0); i++) {
if (UNSAFE.getByte(null, baseOffHeap + i) != 0) {
throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0");
}
}
for (; i < Math.min(size * 8 + offset, size * 8); i++) {
if (UNSAFE.getByte(null, baseOffHeap + i) != verifyByteArray[i - offset]) {
throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i-offset]);
}
}
for (; i < byteArray.length; i++) {
if (UNSAFE.getByte(null, baseOffHeap + i) != 0) {
throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0");
}
}
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteLong1(byte[] dest, long[] src) {
for (int i = 0; i < src.length; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, src[i]);
}
}
@Run(test = "testByteLong1")
public static void testByteLong1_runner() {
runAndVerify(() -> testByteLong1(byteArray, longArray), 0);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteLong2(byte[] dest, long[] src) {
for (int i = 1; i < src.length; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), src[i]);
}
}
@Run(test = "testByteLong2")
public static void testByteLong2_runner() {
runAndVerify(() -> testByteLong2(byteArray, longArray), -8);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteLong3(byte[] dest, long[] src) {
for (int i = 0; i < src.length - 1; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), src[i]);
}
}
@Run(test = "testByteLong3")
public static void testByteLong3_runner() {
runAndVerify(() -> testByteLong3(byteArray, longArray), 8);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteLong4(byte[] dest, long[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, src[i]);
}
}
@Run(test = "testByteLong4")
public static void testByteLong4_runner() {
baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
runAndVerify(() -> testByteLong4(byteArray, longArray, 0, size), 0);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteLong5(byte[] dest, long[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), src[i]);
}
}
@Run(test = "testByteLong5")
public static void testByteLong5_runner() {
baseOffset = 1;
runAndVerify(() -> testByteLong5(byteArray, longArray, 0, size-1), 8);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteByte1(byte[] dest, byte[] src) {
for (int i = 0; i < src.length / 8; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
}
}
@Run(test = "testByteByte1")
public static void testByteByte1_runner() {
runAndVerify2(() -> testByteByte1(byteArray, byteArray), 0);
}
// It would be legal to vectorize this one but it's not currently
@Test
//@IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteByte2(byte[] dest, byte[] src) {
for (int i = 1; i < src.length / 8; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
}
}
@Run(test = "testByteByte2")
public static void testByteByte2_runner() {
runAndVerify2(() -> testByteByte2(byteArray, byteArray), -8);
}
@Test
@IR(failOn = { IRNode.LOAD_VECTOR, IRNode.STORE_VECTOR })
public static void testByteByte3(byte[] dest, byte[] src) {
for (int i = 0; i < src.length / 8 - 1; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
}
}
@Run(test = "testByteByte3")
public static void testByteByte3_runner() {
runAndVerify2(() -> testByteByte3(byteArray, byteArray), 8);
}
@Test
@IR(failOn = { IRNode.LOAD_VECTOR, IRNode.STORE_VECTOR })
public static void testByteByte4(byte[] dest, byte[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
}
}
@Run(test = "testByteByte4")
public static void testByteByte4_runner() {
baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
runAndVerify2(() -> testByteByte4(byteArray, byteArray, 0, size), 0);
}
@Test
@IR(failOn = { IRNode.LOAD_VECTOR, IRNode.STORE_VECTOR })
public static void testByteByte5(byte[] dest, byte[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
}
}
@Run(test = "testByteByte5")
public static void testByteByte5_runner() {
baseOffset = 1;
runAndVerify2(() -> testByteByte5(byteArray, byteArray, 0, size-1), 8);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testOffHeapLong1(long dest, long[] src) {
for (int i = 0; i < src.length; i++) {
UNSAFE.putLongUnaligned(null, dest + 8 * i, src[i]);
}
}
@Run(test = "testOffHeapLong1")
public static void testOffHeapLong1_runner() {
runAndVerify3(() -> testOffHeapLong1(baseOffHeap, longArray), 0);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testOffHeapLong2(long dest, long[] src) {
for (int i = 1; i < src.length; i++) {
UNSAFE.putLongUnaligned(null, dest + 8 * (i - 1), src[i]);
}
}
@Run(test = "testOffHeapLong2")
public static void testOffHeapLong2_runner() {
runAndVerify3(() -> testOffHeapLong2(baseOffHeap, longArray), -8);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testOffHeapLong3(long dest, long[] src) {
for (int i = 0; i < src.length - 1; i++) {
UNSAFE.putLongUnaligned(null, dest + 8 * (i + 1), src[i]);
}
}
@Run(test = "testOffHeapLong3")
public static void testOffHeapLong3_runner() {
runAndVerify3(() -> testOffHeapLong3(baseOffHeap, longArray), 8);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testOffHeapLong4(long dest, long[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(null, dest + 8 * i + baseOffset, src[i]);
}
}
@Run(test = "testOffHeapLong4")
public static void testOffHeapLong4_runner() {
baseOffset = 8;
runAndVerify3(() -> testOffHeapLong4(baseOffHeap, longArray, 0, size-1), 8);
}
}

@ -0,0 +1,70 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8300258
* @modules java.base/jdk.internal.misc
*
* @run main/othervm -XX:-TieredCompilation -Xbatch TestOverlappingMismatchedAccesses
*/
import jdk.internal.misc.Unsafe;
public class TestOverlappingMismatchedAccesses {
static int N = 50;
static int gold[] = new int[N];
static Unsafe unsafe = Unsafe.getUnsafe();
public static void main(String[] strArr) {
init(gold);
test(gold);
for (int i = 0; i < 10_000; i++){
int[] data = new int[N];
init(data);
test(data);
verify(data, gold);
}
}
static void test(int[] data) {
for (int i = 2; i < N-2; i++) {
int v = data[i];
unsafe.putFloat(data, unsafe.ARRAY_BYTE_BASE_OFFSET + 4 * i + 8, v + 5);
}
}
static void init(int[] data) {
for (int j = 0; j < N; j++) {
data[j] = j;
}
}
static void verify(int[] data, int[] gold) {
for (int i = 0; i < N; i++) {
if (data[i] != gold[i]) {
throw new RuntimeException(" Invalid result: dataI[" + i + "]: " + data[i] + " != " + gold[i]);
}
}
}
}