8311932: Suboptimal compiled code of nested loop over memory segment

Reviewed-by: thartmann, chagedorn
This commit is contained in:
Roland Westrelin 2023-11-17 07:54:10 +00:00
parent 369bbecc0d
commit 129c4708b4
2 changed files with 80 additions and 4 deletions

View File

@ -855,6 +855,28 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
// not a loop after all // not a loop after all
return false; return false;
} }
if (range_checks.size() > 0) {
// This transformation requires peeling one iteration. Also, if it has range checks and they are eliminated by Loop
// Predication, then 2 Hoisted Check Predicates are added for one range check. Finally, transforming a long range
// check requires extra logic to be executed before the loop is entered and for the outer loop. As a result, the
// transformations can't pay off for a small number of iterations: roughly, if the loop runs for 3 iterations, it's
// going to execute as many range checks once transformed with range checks eliminated (1 peeled iteration with
// range checks + 2 predicates per range checks) as it would have not transformed. It also has to pay for the extra
// logic on loop entry and for the outer loop.
loop->compute_trip_count(this);
if (head->is_CountedLoop() && head->as_CountedLoop()->has_exact_trip_count()) {
if (head->as_CountedLoop()->trip_count() <= 3) {
return false;
}
} else {
loop->compute_profile_trip_cnt(this);
if (!head->is_profile_trip_failed() && head->profile_trip_cnt() <= 3) {
return false;
}
}
}
julong orig_iters = (julong)hi->hi_as_long() - lo->lo_as_long(); julong orig_iters = (julong)hi->hi_as_long() - lo->lo_as_long();
iters_limit = checked_cast<int>(MIN2((julong)iters_limit, orig_iters)); iters_limit = checked_cast<int>(MIN2((julong)iters_limit, orig_iters));

View File

@ -29,7 +29,7 @@ import java.util.Objects;
/* /*
* @test * @test
* @bug 8259609 8276116 * @bug 8259609 8276116 8311932
* @summary C2: optimize long range checks in long counted loops * @summary C2: optimize long range checks in long counted loops
* @library /test/lib / * @library /test/lib /
* @requires vm.compiler2.enabled * @requires vm.compiler2.enabled
@ -38,9 +38,9 @@ import java.util.Objects;
public class TestLongRangeChecks { public class TestLongRangeChecks {
public static void main(String[] args) { public static void main(String[] args) {
TestFramework.runWithFlags("-XX:-UseCountedLoopSafepoints"); TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:-UseCountedLoopSafepoints", "-XX:LoopUnrollLimit=0");
TestFramework.runWithFlags("-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1"); TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1", "-XX:LoopUnrollLimit=0");
TestFramework.runWithFlags("-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1000"); TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1000", "-XX:LoopUnrollLimit=0");
} }
@ -246,4 +246,58 @@ public class TestLongRangeChecks {
private void testStridePosScaleNegInIntLoop2_runner() { private void testStridePosScaleNegInIntLoop2_runner() {
testStridePosScaleNegInIntLoop2(0, 100, 200, 198); testStridePosScaleNegInIntLoop2(0, 100, 200, 198);
} }
@Test
@IR(counts = { IRNode.LONG_COUNTED_LOOP, "1" })
@IR(failOn = { IRNode.COUNTED_LOOP, IRNode.LOOP })
public static void testStridePosScalePosShortLoop(long start, long stop, long length, long offset) {
final long scale = 1;
final long stride = 1;
// Loop runs for too few iterations. Transforming it wouldn't pay off.
for (long i = start; i < stop; i += stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStridePosScalePosShortLoop")
private void testStridePosScalePosShortLoop_runner() {
testStridePosScalePosShortLoop(0, 2, 2, 0);
}
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP })
public static void testStridePosScalePosInIntLoopShortLoop1(int start, int stop, long length, long offset) {
final long scale = 2;
final int stride = 1;
// Same but with int loop
for (int i = start; i < stop; i += stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStridePosScalePosInIntLoopShortLoop1")
private void testStridePosScalePosInIntLoopShortLoop1_runner() {
testStridePosScalePosInIntLoopShortLoop1(0, 2, 4, 0);
}
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP })
public static void testStridePosScalePosInIntLoopShortLoop2(long length, long offset) {
final long scale = 2;
final int stride = 1;
// Same but with int loop
for (int i = 0; i < 3; i += stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStridePosScalePosInIntLoopShortLoop2")
private void testStridePosScalePosInIntLoopShortLoop2_runner() {
testStridePosScalePosInIntLoopShortLoop2(6, 0);
}
} }