8328938: C2 SuperWord: disable vectorization for large stride and scale
Reviewed-by: chagedorn, kvn
This commit is contained in:
parent
41966885b9
commit
2931458711
@ -393,6 +393,25 @@ VPointer::VPointer(const MemNode* mem, const VLoop& vloop,
|
|||||||
NOT_PRODUCT(if(_tracer._is_trace_alignment) _tracer.restore_depth();)
|
NOT_PRODUCT(if(_tracer._is_trace_alignment) _tracer.restore_depth();)
|
||||||
NOT_PRODUCT(_tracer.ctor_6(mem);)
|
NOT_PRODUCT(_tracer.ctor_6(mem);)
|
||||||
|
|
||||||
|
// In the pointer analysis, and especially the AlignVector, analysis we assume that
|
||||||
|
// stride and scale are not too large. For example, we multiply "scale * stride",
|
||||||
|
// and assume that this does not overflow the int range. We also take "abs(scale)"
|
||||||
|
// and "abs(stride)", which would overflow for min_int = -(2^31). Still, we want
|
||||||
|
// to at least allow small and moderately large stride and scale. Therefore, we
|
||||||
|
// allow values up to 2^30, which is only a factor 2 smaller than the max/min int.
|
||||||
|
// Normal performance relevant code will have much lower values. And the restriction
|
||||||
|
// allows us to keep the rest of the autovectorization code much simpler, since we
|
||||||
|
// do not have to deal with overflows.
|
||||||
|
jlong long_scale = _scale;
|
||||||
|
jlong long_stride = _vloop.iv_stride();
|
||||||
|
jlong max_val = 1 << 30;
|
||||||
|
if (abs(long_scale) >= max_val ||
|
||||||
|
abs(long_stride) >= max_val ||
|
||||||
|
abs(long_scale * long_stride) >= max_val) {
|
||||||
|
assert(!valid(), "adr stride*scale is too large");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
_base = base;
|
_base = base;
|
||||||
_adr = adr;
|
_adr = adr;
|
||||||
assert(valid(), "Usable");
|
assert(valid(), "Usable");
|
||||||
|
@ -0,0 +1,253 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test id=vanilla
|
||||||
|
* @bug 8328938
|
||||||
|
* @summary Test autovectorization with large scale and stride
|
||||||
|
* @modules java.base/jdk.internal.misc
|
||||||
|
* @library /test/lib /
|
||||||
|
* @run main compiler.loopopts.superword.TestLargeScaleAndStride
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test id=AlignVector
|
||||||
|
* @bug 8328938
|
||||||
|
* @modules java.base/jdk.internal.misc
|
||||||
|
* @library /test/lib /
|
||||||
|
* @requires vm.compiler2.enabled
|
||||||
|
* @run main/othervm -XX:+AlignVector compiler.loopopts.superword.TestLargeScaleAndStride
|
||||||
|
*/
|
||||||
|
|
||||||
|
package compiler.loopopts.superword;
|
||||||
|
|
||||||
|
import jdk.internal.misc.Unsafe;
|
||||||
|
|
||||||
|
public class TestLargeScaleAndStride {
|
||||||
|
static final Unsafe UNSAFE = Unsafe.getUnsafe();
|
||||||
|
static int RANGE = 100_000;
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
byte[] a = new byte[100];
|
||||||
|
fill(a);
|
||||||
|
|
||||||
|
byte[] gold1a = a.clone();
|
||||||
|
byte[] gold1b = a.clone();
|
||||||
|
byte[] gold2a = a.clone();
|
||||||
|
byte[] gold2b = a.clone();
|
||||||
|
byte[] gold2c = a.clone();
|
||||||
|
byte[] gold2d = a.clone();
|
||||||
|
byte[] gold3 = a.clone();
|
||||||
|
test1a(gold1a);
|
||||||
|
test1b(gold1b);
|
||||||
|
test2a(gold2a);
|
||||||
|
test2b(gold2b);
|
||||||
|
test2c(gold2c);
|
||||||
|
test2d(gold2d);
|
||||||
|
test3(gold3);
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
byte[] c = a.clone();
|
||||||
|
test1a(c);
|
||||||
|
verify(c, gold1a);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
byte[] c = a.clone();
|
||||||
|
test1b(c);
|
||||||
|
verify(c, gold1b);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
byte[] c = a.clone();
|
||||||
|
test2a(c);
|
||||||
|
verify(c, gold2a);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
byte[] c = a.clone();
|
||||||
|
test2b(c);
|
||||||
|
verify(c, gold2b);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
byte[] c = a.clone();
|
||||||
|
test2c(c);
|
||||||
|
verify(c, gold2c);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
byte[] c = a.clone();
|
||||||
|
test2d(c);
|
||||||
|
verify(c, gold2d);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
byte[] c = a.clone();
|
||||||
|
test3(c);
|
||||||
|
verify(c, gold3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fill(byte[] a) {
|
||||||
|
for (int i = 0; i < a.length; i++) {
|
||||||
|
a[i] = (byte)i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void verify(byte[] a, byte[] b) {
|
||||||
|
for (int i = 0; i < a.length; i++) {
|
||||||
|
if (a[i] != b[i]) {
|
||||||
|
throw new RuntimeException("wrong value: " + i + ": " + a[i] + " != " + b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test1a(byte[] a) {
|
||||||
|
int scale = 1 << 31;
|
||||||
|
for (int i = 0; i < RANGE; i+=2) {
|
||||||
|
long base = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||||
|
// i is a multiple of 2
|
||||||
|
// 2 * (1 >> 31) -> overflow to zero
|
||||||
|
int j = scale * i; // always zero
|
||||||
|
byte v0 = UNSAFE.getByte(a, base + (int)(j + 0));
|
||||||
|
byte v1 = UNSAFE.getByte(a, base + (int)(j + 1));
|
||||||
|
byte v2 = UNSAFE.getByte(a, base + (int)(j + 2));
|
||||||
|
byte v3 = UNSAFE.getByte(a, base + (int)(j + 3));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 0), (byte)(v0 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 1), (byte)(v1 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 2), (byte)(v2 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 3), (byte)(v3 + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test1b(byte[] a) {
|
||||||
|
int scale = 1 << 31;
|
||||||
|
for (int i = RANGE-2; i >= 0; i-=2) {
|
||||||
|
long base = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||||
|
// i is a multiple of 2
|
||||||
|
// 2 * (1 >> 31) -> overflow to zero
|
||||||
|
int j = scale * i; // always zero
|
||||||
|
byte v0 = UNSAFE.getByte(a, base + (int)(j + 0));
|
||||||
|
byte v1 = UNSAFE.getByte(a, base + (int)(j + 1));
|
||||||
|
byte v2 = UNSAFE.getByte(a, base + (int)(j + 2));
|
||||||
|
byte v3 = UNSAFE.getByte(a, base + (int)(j + 3));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 0), (byte)(v0 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 1), (byte)(v1 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 2), (byte)(v2 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 3), (byte)(v3 + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test2a(byte[] a) {
|
||||||
|
int scale = 1 << 30;
|
||||||
|
for (int i = 0; i < RANGE; i+=4) {
|
||||||
|
long base = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||||
|
// i is a multiple of 4
|
||||||
|
// 4 * (1 >> 30) -> overflow to zero
|
||||||
|
int j = scale * i; // always zero
|
||||||
|
byte v0 = UNSAFE.getByte(a, base + (int)(j + 0));
|
||||||
|
byte v1 = UNSAFE.getByte(a, base + (int)(j + 1));
|
||||||
|
byte v2 = UNSAFE.getByte(a, base + (int)(j + 2));
|
||||||
|
byte v3 = UNSAFE.getByte(a, base + (int)(j + 3));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 0), (byte)(v0 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 1), (byte)(v1 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 2), (byte)(v2 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 3), (byte)(v3 + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void test2b(byte[] a) {
|
||||||
|
int scale = 1 << 30;
|
||||||
|
for (int i = RANGE-4; i >= 0; i-=4) {
|
||||||
|
long base = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||||
|
// i is a multiple of 4
|
||||||
|
// 4 * (1 >> 30) -> overflow to zero
|
||||||
|
int j = scale * i; // always zero
|
||||||
|
byte v0 = UNSAFE.getByte(a, base + (int)(j + 0));
|
||||||
|
byte v1 = UNSAFE.getByte(a, base + (int)(j + 1));
|
||||||
|
byte v2 = UNSAFE.getByte(a, base + (int)(j + 2));
|
||||||
|
byte v3 = UNSAFE.getByte(a, base + (int)(j + 3));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 0), (byte)(v0 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 1), (byte)(v1 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 2), (byte)(v2 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 3), (byte)(v3 + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test2c(byte[] a) {
|
||||||
|
int scale = -(1 << 30);
|
||||||
|
for (int i = 0; i < RANGE; i+=4) {
|
||||||
|
long base = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||||
|
// i is a multiple of 4
|
||||||
|
// 4 * (1 >> 30) -> overflow to zero
|
||||||
|
int j = scale * i; // always zero
|
||||||
|
byte v0 = UNSAFE.getByte(a, base + (int)(j + 0));
|
||||||
|
byte v1 = UNSAFE.getByte(a, base + (int)(j + 1));
|
||||||
|
byte v2 = UNSAFE.getByte(a, base + (int)(j + 2));
|
||||||
|
byte v3 = UNSAFE.getByte(a, base + (int)(j + 3));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 0), (byte)(v0 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 1), (byte)(v1 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 2), (byte)(v2 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 3), (byte)(v3 + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test2d(byte[] a) {
|
||||||
|
int scale = -(1 << 30);
|
||||||
|
for (int i = RANGE-4; i >= 0; i-=4) {
|
||||||
|
long base = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||||
|
// i is a multiple of 4
|
||||||
|
// 4 * (1 >> 30) -> overflow to zero
|
||||||
|
int j = scale * i; // always zero
|
||||||
|
byte v0 = UNSAFE.getByte(a, base + (int)(j + 0));
|
||||||
|
byte v1 = UNSAFE.getByte(a, base + (int)(j + 1));
|
||||||
|
byte v2 = UNSAFE.getByte(a, base + (int)(j + 2));
|
||||||
|
byte v3 = UNSAFE.getByte(a, base + (int)(j + 3));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 0), (byte)(v0 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 1), (byte)(v1 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 2), (byte)(v2 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 3), (byte)(v3 + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test3(byte[] a) {
|
||||||
|
int scale = 1 << 28;
|
||||||
|
int stride = 1 << 4;
|
||||||
|
int start = -(1 << 30);
|
||||||
|
int end = 1 << 30;
|
||||||
|
for (int i = start; i < end; i+=stride) {
|
||||||
|
long base = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||||
|
int j = scale * i; // always zero
|
||||||
|
byte v0 = UNSAFE.getByte(a, base + (int)(j + 0));
|
||||||
|
byte v1 = UNSAFE.getByte(a, base + (int)(j + 1));
|
||||||
|
byte v2 = UNSAFE.getByte(a, base + (int)(j + 2));
|
||||||
|
byte v3 = UNSAFE.getByte(a, base + (int)(j + 3));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 0), (byte)(v0 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 1), (byte)(v1 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 2), (byte)(v2 + 1));
|
||||||
|
UNSAFE.putByte(a, base + (int)(j + 3), (byte)(v3 + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user