8332920: C2: Partial Peeling is wrongly applied for CmpU with negative limit

Reviewed-by: kvn, thartmann, epeter
2024-06-11 11:32:12 +00:00 · 2024-06-11 11:32:12 +00:00 · ef101f1bf2
commit ef101f1bf2
parent 28437459fb
2 changed files with 493 additions and 38 deletions
--- a/src/hotspot/share/opto/loopopts.cpp
+++ b/src/hotspot/share/opto/loopopts.cpp
@ -2987,52 +2987,101 @@ RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
  return reg;
 }

-//------------------------------ insert_cmpi_loop_exit -------------------------------------
-// Clone a signed compare loop exit from an unsigned compare and
-// insert it before the unsigned cmp on the stay-in-loop path.
-// All new nodes inserted in the dominator tree between the original
-// if and it's projections.  The original if test is replaced with
-// a constant to force the stay-in-loop path.
+// Idea
+// ----
+// Partial Peeling tries to rotate the loop in such a way that it can later be turned into a counted loop. Counted loops
+// require a signed loop exit test. When calling this method, we've only found a suitable unsigned test to partial peel
+// with. Therefore, we try to split off a signed loop exit test from the unsigned test such that it can be used as new
+// loop exit while keeping the unsigned test unchanged and preserving the same behavior as if we've used the unsigned
+// test alone instead:
 //
-// This is done to make sure that the original if and it's projections
-// still dominate the same set of control nodes, that the ctrl() relation
-// from data nodes to them is preserved, and that their loop nesting is
-// preserved.
+// Before Partial Peeling:
+//   Loop:
+//     <peeled section>
+//     Split off signed loop exit test
+//     <-- CUT HERE -->
+//     Unchanged unsigned loop exit test
+//     <rest of unpeeled section>
+//     goto Loop
 //
-// before
-//          if(i <u limit)    unsigned compare loop exit
+// After Partial Peeling:
+//   <cloned peeled section>
+//   Cloned split off signed loop exit test
+//   Loop:
+//     Unchanged unsigned loop exit test
+//     <rest of unpeeled section>
+//     <peeled section>
+//     Split off signed loop exit test
+//     goto Loop
+//
+// Details
+// -------
+// Before:
+//          if (i <u limit)    Unsigned loop exit condition
 //         /       |
 //        v        v
 //   exit-proj   stay-in-loop-proj
 //
-// after
-//          if(stay-in-loop-const)  original if
-//         /       |
-//        /        v
-//       /  if(i <  limit)    new signed test
+// Split off a signed loop exit test (i.e. with CmpI) from an unsigned loop exit test (i.e. with CmpU) and insert it
+// before the CmpU on the stay-in-loop path and keep both tests:
+//
+//          if (i <u limit)    Signed loop exit test
+//        /        |
+//       /  if (i <u limit)    Unsigned loop exit test
 //      /  /       |
-//     /  /        v
-//    /  /  if(i <u limit)    new cloned unsigned test
-//   /  /   /      |
-//   v  v  v       |
-//    region       |
-//        |        |
-//      dum-if     |
-//     /  |        |
-// ether  |        |
-//        v        v
+//     v  v        v
+//  exit-region  stay-in-loop-proj
+//
+// Implementation
+// --------------
+// We need to make sure that the new signed loop exit test is properly inserted into the graph such that the unsigned
+// loop exit test still dominates the same set of control nodes, the ctrl() relation from data nodes to both loop
+// exit tests is preserved, and their loop nesting is correct.
+//
+// To achieve that, we clone the unsigned loop exit test completely (leave it unchanged), insert the signed loop exit
+// test above it and kill the original unsigned loop exit test by setting it's condition to a constant
+// (i.e. stay-in-loop-const in graph below) such that IGVN can fold it later:
+//
+//           if (stay-in-loop-const)  Killed original unsigned loop exit test
+//          /       |
+//         /        v
+//        /  if (i <  limit)          Split off signed loop exit test
+//       /  /       |
+//      /  /        v
+//     /  /  if (i <u limit)          Cloned unsigned loop exit test
+//    /  /   /      |
+//   v  v  v        |
+//  exit-region     |
+//        |         |
+//    dummy-if      |
+//     /  |         |
+// dead   |         |
+//        v         v
 //   exit-proj   stay-in-loop-proj
 //
-IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
+// Note: The dummy-if is inserted to create a region to merge the loop exits between the original to be killed unsigned
+//       loop exit test and its exit projection while keeping the exit projection (also see insert_region_before_proj()).
+//
+// Requirements
+// ------------
+// Note that we can only split off a signed loop exit test from the unsigned loop exit test when the behavior is exactly
+// the same as before with only a single unsigned test. This is only possible if certain requirements are met.
+// Otherwise, we need to bail out (see comments in the code below).
+IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree* loop) {
  const bool Signed   = true;
  const bool Unsigned = false;

  BoolNode* bol = if_cmpu->in(1)->as_Bool();
-  if (bol->_test._test != BoolTest::lt) return nullptr;
+  if (bol->_test._test != BoolTest::lt) {
+    return nullptr;
+  }
  CmpNode* cmpu = bol->in(1)->as_Cmp();
-  if (cmpu->Opcode() != Op_CmpU) return nullptr;
+  assert(cmpu->Opcode() == Op_CmpU, "must be unsigned comparison");
+
  int stride = stride_of_possible_iv(if_cmpu);
-  if (stride == 0) return nullptr;
+  if (stride == 0) {
+    return nullptr;
+  }

  Node* lp_proj = stay_in_loop(if_cmpu, loop);
  guarantee(lp_proj != nullptr, "null loop node");
@ -3044,14 +3093,93 @@ IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *lo
    // We therefore can't add a single exit condition.
    return nullptr;
  }
-  // The loop exit condition is !(i <u limit) ==> (i < 0 || i >= limit).
-  // Split out the exit condition (i < 0) for stride < 0 or (i >= limit) for stride > 0.
-  Node* limit = nullptr;
+  // The unsigned loop exit condition is
+  //   !(i <u  limit)
+  // =   i >=u limit
+  //
+  // First, we note that for any x for which
+  //   0 <= x <= INT_MAX
+  // we can convert x to an unsigned int and still get the same guarantee:
+  //   0 <=  (uint) x <=  INT_MAX = (uint) INT_MAX
+  //   0 <=u (uint) x <=u INT_MAX = (uint) INT_MAX   (LEMMA)
+  //
+  // With that in mind, if
+  //   limit >= 0             (COND)
+  // then the unsigned loop exit condition
+  //   i >=u limit            (ULE)
+  // is equivalent to
+  //   i < 0 || i >= limit    (SLE-full)
+  // because either i is negative and therefore always greater than MAX_INT when converting to unsigned
+  //   (uint) i >=u MAX_INT >= limit >= 0
+  // or otherwise
+  //   i >= limit >= 0
+  // holds due to (LEMMA).
+  //
+  // For completeness, a counterexample with limit < 0:
+  // Assume i = -3 and limit = -2:
+  //   i  < 0
+  //   -2 < 0
+  // is true and thus also "i < 0 || i >= limit". But
+  //   i  >=u limit
+  //   -3 >=u -2
+  // is false.
+  Node* limit = cmpu->in(2);
+  const TypeInt* type_limit = _igvn.type(limit)->is_int();
+  if (type_limit->_lo < 0) {
+    return nullptr;
+  }
+
+  // We prove below that we can extract a single signed loop exit condition from (SLE-full), depending on the stride:
+  //   stride < 0:
+  //     i < 0        (SLE = SLE-negative)
+  //   stride > 0:
+  //     i >= limit   (SLE = SLE-positive)
+  // such that we have the following graph before Partial Peeling with stride > 0 (similar for stride < 0):
+  //
+  // Loop:
+  //   <peeled section>
+  //   i >= limit    (SLE-positive)
+  //   <-- CUT HERE -->
+  //   i >=u limit   (ULE)
+  //   <rest of unpeeled section>
+  //   goto Loop
+  //
+  // We exit the loop if:
+  //   (SLE) is true OR (ULE) is true
+  // However, if (SLE) is true then (ULE) also needs to be true to ensure the exact same behavior. Otherwise, we wrongly
+  // exit a loop that should not have been exited if we did not apply Partial Peeling. More formally, we need to ensure:
+  //   (SLE) IMPLIES (ULE)
+  // This indeed holds when (COND) is given:
+  // - stride > 0:
+  //       i >=  limit             // (SLE = SLE-positive)
+  //       i >=  limit >= 0        // (COND)
+  //       i >=u limit >= 0        // (LEMMA)
+  //     which is the unsigned loop exit condition (ULE).
+  // - stride < 0:
+  //       i        <  0           // (SLE = SLE-negative)
+  //       (uint) i >u MAX_INT     // (NEG) all negative values are greater than MAX_INT when converted to unsigned
+  //       MAX_INT >= limit >= 0   // (COND)
+  //       MAX_INT >=u limit >= 0  // (LEMMA)
+  //     and thus from (NEG) and (LEMMA):
+  //       i >=u limit
+  //     which is the unsigned loop exit condition (ULE).
+  //
+  //
+  // After Partial Peeling, we have the following structure for stride > 0 (similar for stride < 0):
+  //   <cloned peeled section>
+  //   i >= limit (SLE-positive)
+  //   Loop:
+  //     i >=u limit (ULE)
+  //     <rest of unpeeled section>
+  //     <peeled section>
+  //     i >= limit (SLE-positive)
+  //     goto Loop
+  Node* rhs_cmpi;
  if (stride > 0) {
-    limit = cmpu->in(2);
+    rhs_cmpi = limit; // For i >= limit
  } else {
-    limit = _igvn.makecon(TypeInt::ZERO);
-    set_ctrl(limit, C->root());
+    rhs_cmpi = _igvn.makecon(TypeInt::ZERO); // For i < 0
+    set_ctrl(rhs_cmpi, C->root());
  }
  // Create a new region on the exit path
  RegionNode* reg = insert_region_before_proj(lp_exit);
@ -3059,7 +3187,7 @@ IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *lo

  // Clone the if-cmpu-true-false using a signed compare
  BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
-  ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
+  ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, rhs_cmpi, lp_continue);
  reg->add_req(cmpi_exit);

  // Clone the if-cmpu-true-false
--- a/test/hotspot/jtreg/compiler/loopopts/TestPartialPeelAtUnsignedTestsNegativeLimit.java
+++ b/test/hotspot/jtreg/compiler/loopopts/TestPartialPeelAtUnsignedTestsNegativeLimit.java
@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test id=Xbatch
+ * @bug 8332920
+ * @summary Tests partial peeling at unsigned tests with limit being negative in exit tests "i >u limit".
+ * @run main/othervm -Xbatch -XX:-TieredCompilation
+ *                   -XX:CompileOnly=*TestPartialPeel*::original*,*TestPartialPeel*::test*
+ *                   compiler.loopopts.TestPartialPeelAtUnsignedTestsNegativeLimit
+ */
+
+/*
+ * @test id=Xcomp-run-inline
+ * @bug 8332920
+ * @summary Tests partial peeling at unsigned tests with limit being negative in exit tests "i >u limit".
+ * @run main/othervm -Xcomp -XX:-TieredCompilation
+ *                   -XX:CompileOnly=*TestPartialPeel*::original*,*TestPartialPeel*::run*,*TestPartialPeel*::test*
+ *                   -XX:CompileCommand=inline,*TestPartialPeelAtUnsignedTestsNegativeLimit::test*
+ *                   -XX:CompileCommand=dontinline,*TestPartialPeelAtUnsignedTestsNegativeLimit::check
+ *                   compiler.loopopts.TestPartialPeelAtUnsignedTestsNegativeLimit
+ */
+
+/*
+ * @test id=Xcomp-compile-test
+ * @bug 8332920
+ * @summary Tests partial peeling at unsigned tests with limit being negative in exit tests "i >u limit".
+ * @run main/othervm -Xcomp -XX:-TieredCompilation -XX:CompileOnly=*TestPartialPeel*::original*,*TestPartialPeel*::test*
+ *                   compiler.loopopts.TestPartialPeelAtUnsignedTestsNegativeLimit
+ */
+
+/*
+ * @test id=vanilla
+ * @bug 8332920
+ * @requires vm.flavor == "server" & (vm.opt.TieredStopAtLevel == null | vm.opt.TieredStopAtLevel == 4)
+ * @summary Tests partial peeling at unsigned tests with limit being negative in exit tests "i >u limit".
+ *          Only run this test with C2 since it is time-consuming and only tests a C2 issue.
+ * @run main compiler.loopopts.TestPartialPeelAtUnsignedTestsNegativeLimit
+ */
+
+package compiler.loopopts;
+
+import java.util.Random;
+
+import static java.lang.Integer.*;
+
+public class TestPartialPeelAtUnsignedTestsNegativeLimit {
+    static int iFld = 10000;
+    static int iterations = 0;
+    static int iFld2;
+    static boolean flag;
+    final static Random RANDOM = new Random();
+
+    public static void main(String[] args) {
+        compareUnsigned(3, 3); // Load Integer class for -Xcomp
+        for (int i = 0; i < 2; i++) {
+            if (!originalTest()) {
+                throw new RuntimeException("originalTest() failed");
+            }
+        }
+
+        for (int i = 0; i < 2000; i++) {
+            // For profiling
+            iFld = -1;
+            originalTestVariation1();
+
+            // Actual run
+            iFld = MAX_VALUE - 100_000;
+            if (!originalTestVariation1()) {
+                throw new RuntimeException("originalTestVariation1() failed");
+            }
+        }
+
+        for (int i = 0; i < 2000; ++i) {
+            // For profiling
+            iFld = MAX_VALUE;
+            originalTestVariation2();
+
+            // Actual run
+            iFld = MIN_VALUE + 100000;
+            if (!originalTestVariation2()) {
+                throw new RuntimeException("originalTestVariation2() failed");
+            }
+        }
+
+        runWhileLTIncr();
+        runWhileLTDecr();
+    }
+
+    // Originally reported simplified regression test with 2 variations (see below).
+    public static boolean originalTest() {
+        for (int i = MAX_VALUE - 50_000; compareUnsigned(i, -1) < 0; i++) {
+            if (compareUnsigned(MIN_VALUE, i) < 0) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    public static boolean originalTestVariation1() {
+        int a = 0;
+        for (int i = iFld; compareUnsigned(i, -1) < 0; ++i) { // i <u -1
+
+            if (i >= Integer.MIN_VALUE + 1 && i <= 100) { // Transformed to unsigned test.
+                return true;
+            }
+            a *= 23;
+        }
+        return false;
+    }
+
+    public static boolean originalTestVariation2() {
+        int a = 0;
+        for (int i = iFld; compareUnsigned(i, -1000) < 0; i--) { // i <u -1
+            if (compareUnsigned(MAX_VALUE - 20, i) > 0) {
+                return true;
+            }
+            a = i;
+        }
+        System.out.println(a);
+        return false;
+    }
+
+
+    public static void testWhileLTIncr(int init, int limit) {
+        int i = init;
+        while (true) {
+            // <Peeled Section>
+
+            // Found as loop head in ciTypeFlow, but both paths inside loop -> head not cloned.
+            // As a result, this head has the safepoint as backedge instead of the loop exit test
+            // and we cannot create a counted loop (yet). We first need to partial peel.
+            if (flag) {
+            }
+
+            iFld2++;
+
+            // Loop exit test i >=u limit (i.e. "while (i <u limit)") to partial peel with.
+            // insert_cmpi_loop_exit() changes this exit condition into a signed and an unsigned test:
+            //   i >= limit && i >=u limit
+            // where the signed condition can be used as proper loop exit condition for a counted loop
+            // (we cannot use an unsigned counted loop exit condition).
+            //
+            // After Partial Peeling, we have:
+            //   if (i >= limit) goto Exit
+            // Loop:
+            //   if (i >=u limit) goto Exit
+            //   ...
+            //   i++;
+            //   if (i >= limit) goto Exit
+            //   goto Loop
+            // Exit:
+            //   ...
+            //
+            // If init = MAX_VALUE and limit = MIN_VALUE:
+            //   i >= limit
+            //   MAX_VALUE >= MIN_VALUE
+            // which is true where
+            //   i >=u limit
+            //   MAX_VALUE >=u MIN_VALUE
+            //   MAX_VALUE >=u (uint)(MAX_INT + 1)
+            // is false and we wrongly never enter the loop even though we should have.
+            // This results in a wrong execution.
+            if (compareUnsigned(i, limit) >= 0) {
+                return;
+            }
+            // <-- Partial Peeling CUT -->
+            // Safepoint
+            // <Unpeeled Section>
+            iterations++;
+            i++;
+        }
+    }
+
+    // Same as testWhileLTIncr() but with decrement instead.
+    public static void testWhileLTDecr(int init, int limit) {
+        int i = init;
+        while (true) {
+            if (flag) {
+            }
+
+            // Loop exit test.
+            if (compareUnsigned(i, limit) >= 0) { // While (i <u limit)
+                return;
+            }
+
+            iterations++;
+            i--;
+        }
+    }
+
+    public static void runWhileLTIncr() {
+        // Currently works:
+        testWhileLTIncr(MAX_VALUE, -1);
+        check(MIN_VALUE); // MAX_VALUE + 1 iterations
+        testWhileLTIncr(-1, 1);
+        check(0);
+        testWhileLTIncr(0, 0);
+        check(0);
+        checkIncrWithRandom(0, 0); // Sanity check this method.
+        flag = !flag; // Change profiling
+        testWhileLTIncr(MAX_VALUE - 2000, MAX_VALUE);
+        check(2000);
+        testWhileLTIncr(MAX_VALUE - 1990, MAX_VALUE);
+        check(1990);
+        testWhileLTIncr(MAX_VALUE - 1, MAX_VALUE);
+        check(1);
+        testWhileLTIncr(MIN_VALUE, MIN_VALUE + 2000);
+        check(2000);
+        testWhileLTIncr(MIN_VALUE, MIN_VALUE + 1990);
+        check(1990);
+        testWhileLTIncr(MIN_VALUE, MIN_VALUE + 1);
+        check(1);
+
+        flag = !flag;
+        // Overflow currently does not work with negative limit and is fixed with patch:
+        testWhileLTIncr(MAX_VALUE, MIN_VALUE);
+        check(1);
+        testWhileLTIncr(MAX_VALUE - 2000, MIN_VALUE);
+        check(2001);
+        testWhileLTIncr(MAX_VALUE, MIN_VALUE + 2000);
+        check(2001);
+        testWhileLTIncr(MAX_VALUE - 2000, MIN_VALUE + 2000);
+        check(4001);
+
+        // Random values
+        int init = RANDOM.nextInt(0, MAX_VALUE);
+        int limit = RANDOM.nextInt(MIN_VALUE, 0);
+        testWhileLTIncr(init, limit);
+        checkIncrWithRandom(init, limit);
+    }
+
+    public static void runWhileLTDecr() {
+        // Currently works:
+        testWhileLTDecr(1, -1);
+        check(2);
+        testWhileLTDecr(-1, 1);
+        check(0);
+        testWhileLTDecr(0, 0);
+        check(0);
+        checkDecrWithRandom(0, 0); // Sanity check this method.
+        flag = !flag;
+        testWhileLTDecr(MAX_VALUE, MIN_VALUE);
+        check(MIN_VALUE); // MAX_VALUE + 1 iterations
+        testWhileLTDecr(MAX_VALUE, -1);
+        check(MIN_VALUE); // MAX_VALUE + 1 iterations
+        testWhileLTDecr(MAX_VALUE, MIN_VALUE);
+        check(MIN_VALUE); // MAX_VALUE + 1 iterations
+        testWhileLTDecr(MIN_VALUE, 0);
+        check(0);
+        testWhileLTDecr(MIN_VALUE, 1);
+        check(0);
+        flag = !flag;
+
+        // Underflow currently does not work with negative limit and is fixed with patch:
+        testWhileLTDecr(MIN_VALUE, -1);
+        check(MIN_VALUE + 1); // MAX_VALUE + 2 iterations
+        testWhileLTDecr(MIN_VALUE, -2000);
+        check(MIN_VALUE + 1); // MAX_VALUE + 2 iterations
+        testWhileLTDecr(MIN_VALUE, MIN_VALUE + 1);
+        check(MIN_VALUE + 1); // MAX_VALUE + 2 iterations
+        testWhileLTDecr(MIN_VALUE + 2000, -1);
+        check(MIN_VALUE + 2001); // MAX_VALUE + 2002 iterations
+        testWhileLTDecr(MIN_VALUE + 2000, -2000);
+        check(MIN_VALUE + 2001); // MAX_VALUE + 2002 iterations
+        testWhileLTDecr(MIN_VALUE + 2000, MIN_VALUE + 2001);
+        check(MIN_VALUE + 2001); // MAX_VALUE + 2002 iterations
+
+        // Random values
+        int r1 = RANDOM.nextInt(MIN_VALUE, 0);
+        int r2 = RANDOM.nextInt(MIN_VALUE, 0);
+        int init = Math.min(r1, r2);
+        int limit = Math.max(r1, r2);
+        testWhileLTDecr(init, limit);
+        checkDecrWithRandom(init, limit);
+    }
+
+    static void check(int expectedIterations) {
+        if (expectedIterations != iterations) {
+            throw new RuntimeException("Expected " + expectedIterations + " iterations but only got " + iterations);
+        }
+        iterations = 0; // Reset
+    }
+
+    static void checkIncrWithRandom(long init, long limit) {
+        long expectedIterations = ((long)(MAX_VALUE) - init) + (limit - (long)MIN_VALUE) + 1;
+        if ((int)expectedIterations != iterations) {
+            String error = "Expected %d iterations but only got %d, init: %d, limit: %d"
+                            .formatted(expectedIterations, iterations, init, limit);
+            throw new RuntimeException(error);
+        }
+        iterations = 0; // Reset
+    }
+
+    static void checkDecrWithRandom(long init, long limit) {
+        long expectedIterations = init + MIN_VALUE + MAX_VALUE + 2;
+        if (init == limit) {
+            expectedIterations = 0;
+        }
+        if ((int)expectedIterations != iterations) {
+            String error = "Expected %d iterations but only got %d, init: %d, limit: %d"
+                    .formatted(expectedIterations, iterations, init, limit);
+            throw new RuntimeException(error);
+        }
+        iterations = 0; // Reset
+    }
+}