8256730: Code that uses Object.checkIndex() range checks doesn't optimize well

Reviewed-by: vlivanov, thartmann
2020-12-10 08:09:08 +00:00 · 2020-12-10 08:09:08 +00:00 · d93293f31b
commit d93293f31b
parent 869dcb6f50
6 changed files with 175 additions and 71 deletions
--- a/src/hotspot/share/opto/castnode.cpp
+++ b/src/hotspot/share/opto/castnode.cpp
@ -240,12 +240,50 @@ const Type* CastIINode::Value(PhaseGVN* phase) const {
  return res;
 }

+static Node* find_or_make_CastII(PhaseIterGVN* igvn, Node* parent, Node* control,
+                                 const TypeInt* type) {
+  Node* n = new CastIINode(parent, type);
+  n->set_req(0, control);
+  Node* existing = igvn->hash_find_insert(n);
+  if (existing != NULL) {
+    n->destruct(igvn);
+    return existing;
+  }
+  return igvn->register_new_node_with_optimizer(n);
+}
+
 Node *CastIINode::Ideal(PhaseGVN *phase, bool can_reshape) {
  Node* progress = ConstraintCastNode::Ideal(phase, can_reshape);
  if (progress != NULL) {
    return progress;
  }

+  PhaseIterGVN *igvn = phase->is_IterGVN();
+  const TypeInt* this_type = this->type()->is_int();
+  Node* z = in(1);
+  const TypeInteger* rx = NULL;
+  const TypeInteger* ry = NULL;
+  // Similar to ConvI2LNode::Ideal() for the same reasons
+  if (!_range_check_dependency && Compile::push_thru_add(phase, z, this_type, rx, ry, T_INT)) {
+    if (igvn == NULL) {
+      // Postpone this optimization to iterative GVN, where we can handle deep
+      // AddI chains without an exponential number of recursive Ideal() calls.
+      phase->record_for_igvn(this);
+      return NULL;
+    }
+    int op = z->Opcode();
+    Node* x = z->in(1);
+    Node* y = z->in(2);
+
+    Node* cx = find_or_make_CastII(igvn, x, in(0), rx->is_int());
+    Node* cy = find_or_make_CastII(igvn, y, in(0), ry->is_int());
+    switch (op) {
+      case Op_AddI:  return new AddINode(cx, cy);
+      case Op_SubI:  return new SubINode(cx, cy);
+      default:       ShouldNotReachHere();
+    }
+  }
+
  // Similar to ConvI2LNode::Ideal() for the same reasons
  // Do not narrow the type of range check dependent CastIINodes to
  // avoid corruption of the graph if a CastII is replaced by TOP but
--- a/src/hotspot/share/opto/compile.hpp
+++ b/src/hotspot/share/opto/compile.hpp
@ -80,6 +80,7 @@ class JVMState;
 class Type;
 class TypeData;
 class TypeInt;
+class TypeInteger;
 class TypePtr;
 class TypeOopPtr;
 class TypeFunc;
@ -1182,6 +1183,10 @@ class Compile : public Phase {
  void set_exception_backedge() { _exception_backedge = true; }
  bool has_exception_backedge() const { return _exception_backedge; }
 #endif
+
+  static bool
+  push_thru_add(PhaseGVN* phase, Node* z, const TypeInteger* tz, const TypeInteger*& rx, const TypeInteger*& ry,
+                BasicType bt);
 };

 #endif // SHARE_OPTO_COMPILE_HPP
--- a/src/hotspot/share/opto/convertnode.cpp
+++ b/src/hotspot/share/opto/convertnode.cpp
@ -252,13 +252,13 @@ const Type* ConvI2LNode::Value(PhaseGVN* phase) const {
  return tl;
 }

-#ifdef _LP64
 static inline bool long_ranges_overlap(jlong lo1, jlong hi1,
                                       jlong lo2, jlong hi2) {
  // Two ranges overlap iff one range's low point falls in the other range.
  return (lo2 <= lo1 && lo1 <= hi2) || (lo1 <= lo2 && lo2 <= hi1);
 }

+#ifdef _LP64
 // If there is an existing ConvI2L node with the given parent and type, return
 // it. Otherwise, create and return a new one. Both reusing existing ConvI2L
 // nodes and postponing the idealization of new ones are needed to avoid an
@ -275,6 +275,80 @@ static Node* find_or_make_convI2L(PhaseIterGVN* igvn, Node* parent,
 }
 #endif

+bool Compile::push_thru_add(PhaseGVN* phase, Node* z, const TypeInteger* tz, const TypeInteger*& rx, const TypeInteger*& ry,
+                            BasicType bt) {
+  int op = z->Opcode();
+  if (op == Op_AddI || op == Op_SubI) {
+    Node* x = z->in(1);
+    Node* y = z->in(2);
+    assert (x != z && y != z, "dead loop in ConvI2LNode::Ideal");
+    if (phase->type(x) == Type::TOP) {
+      return false;
+    }
+    if (phase->type(y) == Type::TOP) {
+      return false;
+    }
+    const TypeInt*  tx = phase->type(x)->is_int();
+    const TypeInt*  ty = phase->type(y)->is_int();
+
+    jlong xlo = tx->is_int()->_lo;
+    jlong xhi = tx->is_int()->_hi;
+    jlong ylo = ty->is_int()->_lo;
+    jlong yhi = ty->is_int()->_hi;
+    jlong zlo = tz->lo_as_long();
+    jlong zhi = tz->hi_as_long();
+    jlong vbit = CONST64(1) << BitsPerInt;
+    int widen =  MAX2(tx->_widen, ty->_widen);
+    if (op == Op_SubI) {
+      jlong ylo0 = ylo;
+      ylo = -yhi;
+      yhi = -ylo0;
+    }
+    // See if x+y can cause positive overflow into z+2**32
+    if (long_ranges_overlap(xlo+ylo, xhi+yhi, zlo+vbit, zhi+vbit)) {
+      return false;
+    }
+    // See if x+y can cause negative overflow into z-2**32
+    if (long_ranges_overlap(xlo+ylo, xhi+yhi, zlo-vbit, zhi-vbit)) {
+      return false;
+    }
+    // Now it's always safe to assume x+y does not overflow.
+    // This is true even if some pairs x,y might cause overflow, as long
+    // as that overflow value cannot fall into [zlo,zhi].
+
+    // Confident that the arithmetic is "as if infinite precision",
+    // we can now use z's range to put constraints on those of x and y.
+    // The "natural" range of x [xlo,xhi] can perhaps be narrowed to a
+    // more "restricted" range by intersecting [xlo,xhi] with the
+    // range obtained by subtracting y's range from the asserted range
+    // of the I2L conversion.  Here's the interval arithmetic algebra:
+    //    x == z-y == [zlo,zhi]-[ylo,yhi] == [zlo,zhi]+[-yhi,-ylo]
+    //    => x in [zlo-yhi, zhi-ylo]
+    //    => x in [zlo-yhi, zhi-ylo] INTERSECT [xlo,xhi]
+    //    => x in [xlo MAX zlo-yhi, xhi MIN zhi-ylo]
+    jlong rxlo = MAX2(xlo, zlo - yhi);
+    jlong rxhi = MIN2(xhi, zhi - ylo);
+    // And similarly, x changing place with y:
+    jlong rylo = MAX2(ylo, zlo - xhi);
+    jlong ryhi = MIN2(yhi, zhi - xlo);
+    if (rxlo > rxhi || rylo > ryhi) {
+      return false;  // x or y is dying; don't mess w/ it
+    }
+    if (op == Op_SubI) {
+      jlong rylo0 = rylo;
+      rylo = -ryhi;
+      ryhi = -rylo0;
+    }
+    assert(rxlo == (int)rxlo && rxhi == (int)rxhi, "x should not overflow");
+    assert(rylo == (int)rylo && ryhi == (int)ryhi, "y should not overflow");
+    rx = TypeInteger::make(rxlo, rxhi, widen, bt);
+    ry = TypeInteger::make(rylo, ryhi, widen, bt);
+    return true;
+  }
+  return false;
+}
+
+
 //------------------------------Ideal------------------------------------------
 Node *ConvI2LNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  PhaseIterGVN *igvn = phase->is_IterGVN();
@ -348,74 +422,21 @@ Node *ConvI2LNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  // Addressing arithmetic will not absorb it as part of a 64-bit AddL.

  Node* z = in(1);
-  int op = z->Opcode();
-  if (op == Op_AddI || op == Op_SubI) {
+  const TypeInteger* rx = NULL;
+  const TypeInteger* ry = NULL;
+  if (Compile::push_thru_add(phase, z, this_type, rx, ry, T_LONG)) {
    if (igvn == NULL) {
      // Postpone this optimization to iterative GVN, where we can handle deep
      // AddI chains without an exponential number of recursive Ideal() calls.
      phase->record_for_igvn(this);
      return this_changed;
    }
+    int op = z->Opcode();
    Node* x = z->in(1);
    Node* y = z->in(2);
-    assert (x != z && y != z, "dead loop in ConvI2LNode::Ideal");
-    if (phase->type(x) == Type::TOP)  return this_changed;
-    if (phase->type(y) == Type::TOP)  return this_changed;
-    const TypeInt*  tx = phase->type(x)->is_int();
-    const TypeInt*  ty = phase->type(y)->is_int();
-    const TypeLong* tz = this_type;
-    jlong xlo = tx->_lo;
-    jlong xhi = tx->_hi;
-    jlong ylo = ty->_lo;
-    jlong yhi = ty->_hi;
-    jlong zlo = tz->_lo;
-    jlong zhi = tz->_hi;
-    jlong vbit = CONST64(1) << BitsPerInt;
-    int widen =  MAX2(tx->_widen, ty->_widen);
-    if (op == Op_SubI) {
-      jlong ylo0 = ylo;
-      ylo = -yhi;
-      yhi = -ylo0;
-    }
-    // See if x+y can cause positive overflow into z+2**32
-    if (long_ranges_overlap(xlo+ylo, xhi+yhi, zlo+vbit, zhi+vbit)) {
-      return this_changed;
-    }
-    // See if x+y can cause negative overflow into z-2**32
-    if (long_ranges_overlap(xlo+ylo, xhi+yhi, zlo-vbit, zhi-vbit)) {
-      return this_changed;
-    }
-    // Now it's always safe to assume x+y does not overflow.
-    // This is true even if some pairs x,y might cause overflow, as long
-    // as that overflow value cannot fall into [zlo,zhi].

-    // Confident that the arithmetic is "as if infinite precision",
-    // we can now use z's range to put constraints on those of x and y.
-    // The "natural" range of x [xlo,xhi] can perhaps be narrowed to a
-    // more "restricted" range by intersecting [xlo,xhi] with the
-    // range obtained by subtracting y's range from the asserted range
-    // of the I2L conversion.  Here's the interval arithmetic algebra:
-    //    x == z-y == [zlo,zhi]-[ylo,yhi] == [zlo,zhi]+[-yhi,-ylo]
-    //    => x in [zlo-yhi, zhi-ylo]
-    //    => x in [zlo-yhi, zhi-ylo] INTERSECT [xlo,xhi]
-    //    => x in [xlo MAX zlo-yhi, xhi MIN zhi-ylo]
-    jlong rxlo = MAX2(xlo, zlo - yhi);
-    jlong rxhi = MIN2(xhi, zhi - ylo);
-    // And similarly, x changing place with y:
-    jlong rylo = MAX2(ylo, zlo - xhi);
-    jlong ryhi = MIN2(yhi, zhi - xlo);
-    if (rxlo > rxhi || rylo > ryhi) {
-      return this_changed;  // x or y is dying; don't mess w/ it
-    }
-    if (op == Op_SubI) {
-      jlong rylo0 = rylo;
-      rylo = -ryhi;
-      ryhi = -rylo0;
-    }
-    assert(rxlo == (int)rxlo && rxhi == (int)rxhi, "x should not overflow");
-    assert(rylo == (int)rylo && ryhi == (int)ryhi, "y should not overflow");
-    Node* cx = find_or_make_convI2L(igvn, x, TypeLong::make(rxlo, rxhi, widen));
-    Node* cy = find_or_make_convI2L(igvn, y, TypeLong::make(rylo, ryhi, widen));
+    Node* cx = find_or_make_convI2L(igvn, x, rx->is_long());
+    Node* cy = find_or_make_convI2L(igvn, y, ry->is_long());
    switch (op) {
      case Op_AddI:  return new AddLNode(cx, cy);
      case Op_SubI:  return new SubLNode(cx, cy);
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@ -2428,6 +2428,7 @@ void PhaseIdealLoop::add_constraint(jlong stride_con, jlong scale_con, Node* off
 //------------------------------is_scaled_iv---------------------------------
 // Return true if exp is a constant times an induction var
 bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
+  exp = exp->uncast();
  if (exp == iv) {
    if (p_scale != NULL) {
      *p_scale = 1;
@ -2436,20 +2437,20 @@ bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
  }
  int opc = exp->Opcode();
  if (opc == Op_MulI) {
-    if (exp->in(1) == iv && exp->in(2)->is_Con()) {
+    if (exp->in(1)->uncast() == iv && exp->in(2)->is_Con()) {
      if (p_scale != NULL) {
        *p_scale = exp->in(2)->get_int();
      }
      return true;
    }
-    if (exp->in(2) == iv && exp->in(1)->is_Con()) {
+    if (exp->in(2)->uncast() == iv && exp->in(1)->is_Con()) {
      if (p_scale != NULL) {
        *p_scale = exp->in(1)->get_int();
      }
      return true;
    }
  } else if (opc == Op_LShiftI) {
-    if (exp->in(1) == iv && exp->in(2)->is_Con()) {
+    if (exp->in(1)->uncast() == iv && exp->in(2)->is_Con()) {
      if (p_scale != NULL) {
        *p_scale = 1 << exp->in(2)->get_int();
      }
@ -2470,6 +2471,7 @@ bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale,
    }
    return true;
  }
+  exp = exp->uncast();
  int opc = exp->Opcode();
  if (opc == Op_AddI) {
    if (is_scaled_iv(exp->in(1), iv, p_scale)) {
--- a/src/hotspot/share/opto/loopopts.cpp
+++ b/src/hotspot/share/opto/loopopts.cpp
@ -56,8 +56,8 @@ Node* PhaseIdealLoop::split_thru_phi(Node* n, Node* region, int policy) {
  // Splitting range check CastIIs through a loop induction Phi can
  // cause new Phis to be created that are left unrelated to the loop
  // induction Phi and prevent optimizations (vectorization)
-  if (n->Opcode() == Op_CastII && n->as_CastII()->has_range_check() &&
-      region->is_CountedLoop() && n->in(1) == region->as_CountedLoop()->phi()) {
+  if (n->Opcode() == Op_CastII && region->is_CountedLoop() &&
+      n->in(1) == region->as_CountedLoop()->phi()) {
    return NULL;
  }

@ -1116,7 +1116,7 @@ static bool merge_point_safe(Node* region) {
        Node* m = n->fast_out(j);
        if (m->Opcode() == Op_ConvI2L)
          return false;
-        if (m->is_CastII() && m->isa_CastII()->has_range_check()) {
+        if (m->is_CastII()) {
          return false;
        }
      }
--- a/test/hotspot/jtreg/compiler/conversions/TestMoveConvI2LOrCastIIThruAddIs.java
+++ b/test/hotspot/jtreg/compiler/conversions/TestMoveConvI2LOrCastIIThruAddIs.java
@ -23,12 +23,13 @@

 package compiler.conversions;

+import java.util.Objects;
 import java.util.Random;
 import jdk.test.lib.Asserts;

 /*
 * @test
- * @bug 8254317
+ * @bug 8254317 8256730
 * @requires vm.compiler2.enabled
 * @summary Exercises the optimization that moves integer-to-long conversions
 *          upwards through different shapes of integer addition
@ -40,14 +41,18 @@ import jdk.test.lib.Asserts;
 *          the explosion earlier.
 * @library /test/lib /
 * @run main/othervm
- *      compiler.conversions.TestMoveConvI2LThroughAddIs functional
+ *      compiler.conversions.TestMoveConvI2LOrCastIIThruAddIs functional
 * @run main/othervm/timeout=30 -Xbatch
- *      compiler.conversions.TestMoveConvI2LThroughAddIs stress1
+ *      compiler.conversions.TestMoveConvI2LOrCastIIThruAddIs stress1
 * @run main/othervm/timeout=30 -Xbatch
- *      compiler.conversions.TestMoveConvI2LThroughAddIs stress2
+ *      compiler.conversions.TestMoveConvI2LOrCastIIThruAddIs stress2
+ * @run main/othervm/timeout=30 -Xbatch
+ *      compiler.conversions.TestMoveConvI2LOrCastIIThruAddIs stress3
+ * @run main/othervm/timeout=30 -Xbatch
+ *      compiler.conversions.TestMoveConvI2LOrCastIIThruAddIs stress4
 */

-public class TestMoveConvI2LThroughAddIs {
+public class TestMoveConvI2LOrCastIIThruAddIs {

    // Number of repetitions of each test. Should be sufficiently large for the
    // method under test to be compiled with C2.
@ -126,6 +131,28 @@ public class TestMoveConvI2LThroughAddIs {
        return d;
    }

+    // Same as testStress1 for CastII
+    static long testStress3(int a) {
+        Objects.checkIndex(a, 2);
+        for (int i = 0; i < 28; i++) {
+            a = a + a;
+        }
+        return Objects.checkIndex(a, 2);
+    }
+
+    // Same as testStress2 for CastII
+    static long testStress4(int a) {
+        a = Objects.checkIndex(a, 2);
+        int b = a;
+        int c = a + a;
+        for (int i = 0; i < 20; i++) {
+            b = b + c;
+            c = b + c;
+        }
+        int d = b + c;
+        return Objects.checkIndex(d, 2);
+    }
+
    public static void main(String[] args) {
        // We use a random number generator to avoid constant propagation in C2
        // and produce a variable ("a" in the different tests) with a finite,
@ -157,6 +184,17 @@ public class TestMoveConvI2LThroughAddIs {
                                 cnd ? 701408733L : 1402817466L);
            }
            break;
+        case "stress3":
+            for (int i = 0; i < N; i++) {
+                testStress3(0);
+            }
+            break;
+        case "stress4":
+            // DAG-shaped stress test.
+            for (int i = 0; i < N; i++) {
+                testStress4(0);
+            }
+            break;
        default:
            System.out.println("invalid mode");
        }