8289996: Fix array range check hoisting for some scaled loop iv

Co-authored-by: John R Rose <jrose@openjdk.org>
Reviewed-by: roland, kvn, thartmann
This commit is contained in:
Pengfei Li 2022-07-26 13:45:38 +00:00
parent da9cc5c9f4
commit 893909558b
4 changed files with 191 additions and 7 deletions

View File

@ -262,7 +262,7 @@ Node *MulINode::Ideal(PhaseGVN *phase, bool can_reshape) {
// Get low bit; check for being the only bit
Node *res = NULL;
unsigned int bit1 = abs_con & (0-abs_con); // Extract low bit
unsigned int bit1 = submultiple_power_of_2(abs_con);
if (bit1 == abs_con) { // Found a power of 2?
res = new LShiftINode(in(1), phase->intcon(log2i_exact(bit1)));
} else {
@ -270,11 +270,20 @@ Node *MulINode::Ideal(PhaseGVN *phase, bool can_reshape) {
unsigned int bit2 = abs_con - bit1;
bit2 = bit2 & (0 - bit2); // Extract 2nd bit
if (bit2 + bit1 == abs_con) { // Found all bits in con?
if (!phase->C->post_loop_opts_phase()) {
// Defer this because it breaks loop range check hoisting
phase->C->record_for_post_loop_opts_igvn(this);
return MulNode::Ideal(phase, can_reshape);
}
Node *n1 = phase->transform(new LShiftINode(in(1), phase->intcon(log2i_exact(bit1))));
Node *n2 = phase->transform(new LShiftINode(in(1), phase->intcon(log2i_exact(bit2))));
res = new AddINode(n2, n1);
} else if (is_power_of_2(abs_con + 1)) {
// Sleezy: power-of-2 - 1. Next time be generic.
if (!phase->C->post_loop_opts_phase()) {
// Defer this because it breaks loop range check hoisting
phase->C->record_for_post_loop_opts_igvn(this);
return MulNode::Ideal(phase, can_reshape);
}
unsigned int temp = abs_con + 1;
Node *n1 = phase->transform(new LShiftINode(in(1), phase->intcon(log2i_exact(temp))));
res = new SubINode(n1, in(1));
@ -356,21 +365,28 @@ Node *MulLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
// Get low bit; check for being the only bit
Node *res = NULL;
julong bit1 = abs_con & (0-abs_con); // Extract low bit
julong bit1 = submultiple_power_of_2(abs_con);
if (bit1 == abs_con) { // Found a power of 2?
res = new LShiftLNode(in(1), phase->intcon(log2i_exact(bit1)));
} else {
// Check for constant with 2 bits set
julong bit2 = abs_con-bit1;
bit2 = bit2 & (0-bit2); // Extract 2nd bit
if (bit2 + bit1 == abs_con) { // Found all bits in con?
if (!phase->C->post_loop_opts_phase()) {
// Defer this because it breaks loop range check hoisting
phase->C->record_for_post_loop_opts_igvn(this);
return MulNode::Ideal(phase, can_reshape);
}
Node *n1 = phase->transform(new LShiftLNode(in(1), phase->intcon(log2i_exact(bit1))));
Node *n2 = phase->transform(new LShiftLNode(in(1), phase->intcon(log2i_exact(bit2))));
res = new AddLNode(n2, n1);
} else if (is_power_of_2(abs_con+1)) {
// Sleezy: power-of-2 -1. Next time be generic.
if (!phase->C->post_loop_opts_phase()) {
// Defer this because it breaks loop range check hoisting
phase->C->record_for_post_loop_opts_igvn(this);
return MulNode::Ideal(phase, can_reshape);
}
julong temp = abs_con + 1;
Node *n1 = phase->transform( new LShiftLNode(in(1), phase->intcon(log2i_exact(temp))));
res = new SubLNode(n1, in(1));

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -119,4 +119,14 @@ inline T next_power_of_2(T value) {
return round_up_power_of_2(value + 1);
}
// Return the largest power of two that is a submultiple of the given value.
// This is the same as the numeric value of the least-significant set bit.
// For unsigned values, it replaces the old trick of (value & -value).
// precondition: value > 0.
template<typename T, ENABLE_IF(std::is_integral<T>::value)>
inline T submultiple_power_of_2(T value) {
assert(value > 0, "Invalid value");
return value & -value;
}
#endif // SHARE_UTILITIES_POWEROFTWO_HPP

View File

@ -0,0 +1,104 @@
/*
* Copyright (c) 2022, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8289996
* @summary Test range check hoisting for some scaled iv at array index
* @library /test/lib /
* @requires vm.debug & vm.compiler2.enabled
* @modules jdk.incubator.vector
* @compile --enable-preview -source ${jdk.version} TestRangeCheckHoistingScaledIV.java
* @run main/othervm --enable-preview compiler.rangechecks.TestRangeCheckHoistingScaledIV
*/
package compiler.rangechecks;
import java.lang.foreign.MemorySegment;
import java.nio.ByteOrder;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.VectorSpecies;
import jdk.test.lib.process.OutputAnalyzer;
import jdk.test.lib.process.ProcessTools;
public class TestRangeCheckHoistingScaledIV {
// Inner class for test loops
class Launcher {
private static final int SIZE = 16000;
private static final VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_64;
private static final ByteOrder ORDER = ByteOrder.nativeOrder();
private static byte[] ta = new byte[SIZE];
private static byte[] tb = new byte[SIZE];
private static MemorySegment sa = MemorySegment.ofArray(ta);
private static MemorySegment sb = MemorySegment.ofArray(tb);
private static int count = 789;
// Normal array accesses with int range checks
public static void scaledIntIV() {
for (int i = 0; i < count; i += 2) {
tb[7 * i] = ta[3 * i];
}
}
// Memory segment accesses with long range checks
public static void scaledLongIV() {
for (long l = 0; l < count; l += 64) {
ByteVector v = ByteVector.fromMemorySegment(SPECIES, sa, l * 6, ORDER);
v.intoMemorySegment(sb, l * 15, ORDER);
}
}
public static void main(String[] args) {
for (int i = 0; i < 20000; i++) {
scaledIntIV();
scaledLongIV();
}
}
}
public static void main(String[] args) throws Exception {
ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
"--enable-preview", "--add-modules", "jdk.incubator.vector",
"-Xbatch", "-XX:+TraceLoopPredicate", Launcher.class.getName());
OutputAnalyzer analyzer = new OutputAnalyzer(pb.start());
analyzer.shouldHaveExitValue(0);
analyzer.outputTo(System.out);
// Check if int range checks are hoisted
analyzer.stdoutShouldContain("rc_predicate init * 3 <u range");
analyzer.stdoutShouldContain("rc_predicate (limit - 2) * 3 <u range");
analyzer.stdoutShouldContain("rc_predicate init * 7 <u range");
analyzer.stdoutShouldContain("rc_predicate (limit - 2) * 7 <u range");
// Check if long range checks are hoisted
analyzer.stdoutShouldContain("rc_predicate init * 6 <u range");
analyzer.stdoutShouldContain("rc_predicate (limit - 64) * 6 <u range");
analyzer.stdoutShouldContain("rc_predicate init * 15 <u range");
analyzer.stdoutShouldContain("rc_predicate (limit - 64) * 15 <u range");
}
}

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2022, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
@State(Scope.Benchmark)
public class RangeCheckHoisting {
private static final int SIZE = 65536;
@Param("6789") private int count;
private static int[] a = new int[SIZE];
private static int[] b = new int[SIZE];
@Benchmark
public void ivScaled3() {
for (int i = 0; i < count; i++) {
b[3 * i] = a[3 * i];
}
}
@Benchmark
public void ivScaled7() {
for (int i = 0; i < count; i++) {
b[7 * i] = a[7 * i];
}
}
}