8289996: Fix array range check hoisting for some scaled loop iv
Co-authored-by: John R Rose <jrose@openjdk.org> Reviewed-by: roland, kvn, thartmann
This commit is contained in:
parent
da9cc5c9f4
commit
893909558b
src/hotspot/share
test
hotspot/jtreg/compiler/rangechecks
micro/org/openjdk/bench/vm/compiler
@ -262,7 +262,7 @@ Node *MulINode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
|||||||
|
|
||||||
// Get low bit; check for being the only bit
|
// Get low bit; check for being the only bit
|
||||||
Node *res = NULL;
|
Node *res = NULL;
|
||||||
unsigned int bit1 = abs_con & (0-abs_con); // Extract low bit
|
unsigned int bit1 = submultiple_power_of_2(abs_con);
|
||||||
if (bit1 == abs_con) { // Found a power of 2?
|
if (bit1 == abs_con) { // Found a power of 2?
|
||||||
res = new LShiftINode(in(1), phase->intcon(log2i_exact(bit1)));
|
res = new LShiftINode(in(1), phase->intcon(log2i_exact(bit1)));
|
||||||
} else {
|
} else {
|
||||||
@ -270,11 +270,20 @@ Node *MulINode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
|||||||
unsigned int bit2 = abs_con - bit1;
|
unsigned int bit2 = abs_con - bit1;
|
||||||
bit2 = bit2 & (0 - bit2); // Extract 2nd bit
|
bit2 = bit2 & (0 - bit2); // Extract 2nd bit
|
||||||
if (bit2 + bit1 == abs_con) { // Found all bits in con?
|
if (bit2 + bit1 == abs_con) { // Found all bits in con?
|
||||||
|
if (!phase->C->post_loop_opts_phase()) {
|
||||||
|
// Defer this because it breaks loop range check hoisting
|
||||||
|
phase->C->record_for_post_loop_opts_igvn(this);
|
||||||
|
return MulNode::Ideal(phase, can_reshape);
|
||||||
|
}
|
||||||
Node *n1 = phase->transform(new LShiftINode(in(1), phase->intcon(log2i_exact(bit1))));
|
Node *n1 = phase->transform(new LShiftINode(in(1), phase->intcon(log2i_exact(bit1))));
|
||||||
Node *n2 = phase->transform(new LShiftINode(in(1), phase->intcon(log2i_exact(bit2))));
|
Node *n2 = phase->transform(new LShiftINode(in(1), phase->intcon(log2i_exact(bit2))));
|
||||||
res = new AddINode(n2, n1);
|
res = new AddINode(n2, n1);
|
||||||
} else if (is_power_of_2(abs_con + 1)) {
|
} else if (is_power_of_2(abs_con + 1)) {
|
||||||
// Sleezy: power-of-2 - 1. Next time be generic.
|
if (!phase->C->post_loop_opts_phase()) {
|
||||||
|
// Defer this because it breaks loop range check hoisting
|
||||||
|
phase->C->record_for_post_loop_opts_igvn(this);
|
||||||
|
return MulNode::Ideal(phase, can_reshape);
|
||||||
|
}
|
||||||
unsigned int temp = abs_con + 1;
|
unsigned int temp = abs_con + 1;
|
||||||
Node *n1 = phase->transform(new LShiftINode(in(1), phase->intcon(log2i_exact(temp))));
|
Node *n1 = phase->transform(new LShiftINode(in(1), phase->intcon(log2i_exact(temp))));
|
||||||
res = new SubINode(n1, in(1));
|
res = new SubINode(n1, in(1));
|
||||||
@ -356,21 +365,28 @@ Node *MulLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
|||||||
|
|
||||||
// Get low bit; check for being the only bit
|
// Get low bit; check for being the only bit
|
||||||
Node *res = NULL;
|
Node *res = NULL;
|
||||||
julong bit1 = abs_con & (0-abs_con); // Extract low bit
|
julong bit1 = submultiple_power_of_2(abs_con);
|
||||||
if (bit1 == abs_con) { // Found a power of 2?
|
if (bit1 == abs_con) { // Found a power of 2?
|
||||||
res = new LShiftLNode(in(1), phase->intcon(log2i_exact(bit1)));
|
res = new LShiftLNode(in(1), phase->intcon(log2i_exact(bit1)));
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
// Check for constant with 2 bits set
|
// Check for constant with 2 bits set
|
||||||
julong bit2 = abs_con-bit1;
|
julong bit2 = abs_con-bit1;
|
||||||
bit2 = bit2 & (0-bit2); // Extract 2nd bit
|
bit2 = bit2 & (0-bit2); // Extract 2nd bit
|
||||||
if (bit2 + bit1 == abs_con) { // Found all bits in con?
|
if (bit2 + bit1 == abs_con) { // Found all bits in con?
|
||||||
|
if (!phase->C->post_loop_opts_phase()) {
|
||||||
|
// Defer this because it breaks loop range check hoisting
|
||||||
|
phase->C->record_for_post_loop_opts_igvn(this);
|
||||||
|
return MulNode::Ideal(phase, can_reshape);
|
||||||
|
}
|
||||||
Node *n1 = phase->transform(new LShiftLNode(in(1), phase->intcon(log2i_exact(bit1))));
|
Node *n1 = phase->transform(new LShiftLNode(in(1), phase->intcon(log2i_exact(bit1))));
|
||||||
Node *n2 = phase->transform(new LShiftLNode(in(1), phase->intcon(log2i_exact(bit2))));
|
Node *n2 = phase->transform(new LShiftLNode(in(1), phase->intcon(log2i_exact(bit2))));
|
||||||
res = new AddLNode(n2, n1);
|
res = new AddLNode(n2, n1);
|
||||||
|
|
||||||
} else if (is_power_of_2(abs_con+1)) {
|
} else if (is_power_of_2(abs_con+1)) {
|
||||||
// Sleezy: power-of-2 -1. Next time be generic.
|
if (!phase->C->post_loop_opts_phase()) {
|
||||||
|
// Defer this because it breaks loop range check hoisting
|
||||||
|
phase->C->record_for_post_loop_opts_igvn(this);
|
||||||
|
return MulNode::Ideal(phase, can_reshape);
|
||||||
|
}
|
||||||
julong temp = abs_con + 1;
|
julong temp = abs_con + 1;
|
||||||
Node *n1 = phase->transform( new LShiftLNode(in(1), phase->intcon(log2i_exact(temp))));
|
Node *n1 = phase->transform( new LShiftLNode(in(1), phase->intcon(log2i_exact(temp))));
|
||||||
res = new SubLNode(n1, in(1));
|
res = new SubLNode(n1, in(1));
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -119,4 +119,14 @@ inline T next_power_of_2(T value) {
|
|||||||
return round_up_power_of_2(value + 1);
|
return round_up_power_of_2(value + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return the largest power of two that is a submultiple of the given value.
|
||||||
|
// This is the same as the numeric value of the least-significant set bit.
|
||||||
|
// For unsigned values, it replaces the old trick of (value & -value).
|
||||||
|
// precondition: value > 0.
|
||||||
|
template<typename T, ENABLE_IF(std::is_integral<T>::value)>
|
||||||
|
inline T submultiple_power_of_2(T value) {
|
||||||
|
assert(value > 0, "Invalid value");
|
||||||
|
return value & -value;
|
||||||
|
}
|
||||||
|
|
||||||
#endif // SHARE_UTILITIES_POWEROFTWO_HPP
|
#endif // SHARE_UTILITIES_POWEROFTWO_HPP
|
||||||
|
@ -0,0 +1,104 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Arm Limited. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test
|
||||||
|
* @bug 8289996
|
||||||
|
* @summary Test range check hoisting for some scaled iv at array index
|
||||||
|
* @library /test/lib /
|
||||||
|
* @requires vm.debug & vm.compiler2.enabled
|
||||||
|
* @modules jdk.incubator.vector
|
||||||
|
* @compile --enable-preview -source ${jdk.version} TestRangeCheckHoistingScaledIV.java
|
||||||
|
* @run main/othervm --enable-preview compiler.rangechecks.TestRangeCheckHoistingScaledIV
|
||||||
|
*/
|
||||||
|
|
||||||
|
package compiler.rangechecks;
|
||||||
|
|
||||||
|
import java.lang.foreign.MemorySegment;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
|
import jdk.incubator.vector.ByteVector;
|
||||||
|
import jdk.incubator.vector.VectorSpecies;
|
||||||
|
import jdk.test.lib.process.OutputAnalyzer;
|
||||||
|
import jdk.test.lib.process.ProcessTools;
|
||||||
|
|
||||||
|
public class TestRangeCheckHoistingScaledIV {
|
||||||
|
|
||||||
|
// Inner class for test loops
|
||||||
|
class Launcher {
|
||||||
|
private static final int SIZE = 16000;
|
||||||
|
private static final VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_64;
|
||||||
|
private static final ByteOrder ORDER = ByteOrder.nativeOrder();
|
||||||
|
|
||||||
|
private static byte[] ta = new byte[SIZE];
|
||||||
|
private static byte[] tb = new byte[SIZE];
|
||||||
|
|
||||||
|
private static MemorySegment sa = MemorySegment.ofArray(ta);
|
||||||
|
private static MemorySegment sb = MemorySegment.ofArray(tb);
|
||||||
|
|
||||||
|
private static int count = 789;
|
||||||
|
|
||||||
|
// Normal array accesses with int range checks
|
||||||
|
public static void scaledIntIV() {
|
||||||
|
for (int i = 0; i < count; i += 2) {
|
||||||
|
tb[7 * i] = ta[3 * i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Memory segment accesses with long range checks
|
||||||
|
public static void scaledLongIV() {
|
||||||
|
for (long l = 0; l < count; l += 64) {
|
||||||
|
ByteVector v = ByteVector.fromMemorySegment(SPECIES, sa, l * 6, ORDER);
|
||||||
|
v.intoMemorySegment(sb, l * 15, ORDER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
for (int i = 0; i < 20000; i++) {
|
||||||
|
scaledIntIV();
|
||||||
|
scaledLongIV();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
|
||||||
|
"--enable-preview", "--add-modules", "jdk.incubator.vector",
|
||||||
|
"-Xbatch", "-XX:+TraceLoopPredicate", Launcher.class.getName());
|
||||||
|
OutputAnalyzer analyzer = new OutputAnalyzer(pb.start());
|
||||||
|
analyzer.shouldHaveExitValue(0);
|
||||||
|
analyzer.outputTo(System.out);
|
||||||
|
|
||||||
|
// Check if int range checks are hoisted
|
||||||
|
analyzer.stdoutShouldContain("rc_predicate init * 3 <u range");
|
||||||
|
analyzer.stdoutShouldContain("rc_predicate (limit - 2) * 3 <u range");
|
||||||
|
analyzer.stdoutShouldContain("rc_predicate init * 7 <u range");
|
||||||
|
analyzer.stdoutShouldContain("rc_predicate (limit - 2) * 7 <u range");
|
||||||
|
|
||||||
|
// Check if long range checks are hoisted
|
||||||
|
analyzer.stdoutShouldContain("rc_predicate init * 6 <u range");
|
||||||
|
analyzer.stdoutShouldContain("rc_predicate (limit - 64) * 6 <u range");
|
||||||
|
analyzer.stdoutShouldContain("rc_predicate init * 15 <u range");
|
||||||
|
analyzer.stdoutShouldContain("rc_predicate (limit - 64) * 15 <u range");
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,54 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Arm Limited. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.openjdk.bench.vm.compiler;
|
||||||
|
|
||||||
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
|
import org.openjdk.jmh.annotations.Param;
|
||||||
|
import org.openjdk.jmh.annotations.Scope;
|
||||||
|
import org.openjdk.jmh.annotations.State;
|
||||||
|
|
||||||
|
@State(Scope.Benchmark)
|
||||||
|
public class RangeCheckHoisting {
|
||||||
|
|
||||||
|
private static final int SIZE = 65536;
|
||||||
|
|
||||||
|
@Param("6789") private int count;
|
||||||
|
|
||||||
|
private static int[] a = new int[SIZE];
|
||||||
|
private static int[] b = new int[SIZE];
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void ivScaled3() {
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
b[3 * i] = a[3 * i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void ivScaled7() {
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
b[7 * i] = a[7 * i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user