8276116: C2: optimize long range checks in int counted loops

Reviewed-by: kvn
2021-12-08 21:33:33 +00:00 · 2021-12-08 21:33:33 +00:00 · b3faecf739
commit b3faecf739
parent fe2ae8e38b
8 changed files with 610 additions and 100 deletions
--- a/src/hotspot/cpu/x86/x86_32.ad
+++ b/src/hotspot/cpu/x86/x86_32.ad
@ -13130,6 +13130,24 @@ instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_lo
  ins_pipe( pipe_cmov_reg_long );
 %}

+instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
+  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
+  ins_cost(400);
+  expand %{
+    cmovLL_reg_LTGE(cmp, flags, dst, src);
+  %}
+%}
+
+instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
+  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
+  ins_cost(500);
+  expand %{
+    cmovLL_mem_LTGE(cmp, flags, dst, src);
+  %}
+%}
+
 // Compare 2 longs and CMOVE ints.
 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
--- a/src/hotspot/share/opto/compile.cpp
+++ b/src/hotspot/share/opto/compile.cpp
@ -3463,7 +3463,7 @@ void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& f
    }
    break;
  case Op_Loop:
-    assert(!n->as_Loop()->is_transformed_long_inner_loop() || _loop_opts_cnt == 0, "should have been turned into a counted loop");
+    assert(!n->as_Loop()->is_loop_nest_inner_loop() || _loop_opts_cnt == 0, "should have been turned into a counted loop");
  case Op_CountedLoop:
  case Op_LongCountedLoop:
  case Op_OuterStripMinedLoop:
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@ -1065,7 +1065,7 @@ void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLo
 // When TRUE, the estimated node budget is also requested.
 //
 // We will actually perform iteration-splitting, a more powerful form of RCE.
-bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional) const {
+bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional, BasicType bt) const {
  if (!provisional && !RangeCheckElimination) return false;

  // If nodes are depleted, some transform has miscalculated its needs.
@ -1087,7 +1087,7 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional)

  BaseCountedLoopNode* cl = _head->as_BaseCountedLoop();
  Node *trip_counter = cl->phi();
-  BasicType bt = cl->bt();
+  assert(!cl->is_LongCountedLoop() || bt == T_LONG, "only long range checks in long counted loops");

  // Check loop body for tests of trip-counter plus loop-invariant vs
  // loop-invariant.
@ -1135,7 +1135,7 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional)
          }
        }

-        if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
+        if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL, bt)) {
          continue;
        }
      }
@ -1145,7 +1145,9 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional)
      if (is_loop_exit(iff)) {
        // Found valid reason to split iterations (if there is room).
        // NOTE: Usually a gross overestimate.
-        return provisional || phase->may_require_nodes(est_loop_clone_sz(2));
+        // Long range checks cause the loop to be transformed in a loop nest which only causes a fixed number of nodes
+        // to be added
+        return provisional || bt == T_LONG || phase->may_require_nodes(est_loop_clone_sz(2));
      }
    } // End of is IF
  }
@ -2508,34 +2510,52 @@ void PhaseIdealLoop::add_constraint(jlong stride_con, jlong scale_con, Node* off
  }
 }

+bool PhaseIdealLoop::is_iv(Node* exp, Node* iv, BasicType bt) {
+  if (exp == iv) {
+    return true;
+  }
+
+  if (bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L && exp->in(1) == iv) {
+    return true;
+  }
+  return false;
+}
+
 //------------------------------is_scaled_iv---------------------------------
 // Return true if exp is a constant times an induction var
-bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt) {
+bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt, bool* converted) {
  exp = exp->uncast();
  assert(bt == T_INT || bt == T_LONG, "unexpected int type");
-  if (exp == iv) {
+  if (is_iv(exp, iv, bt)) {
    if (p_scale != NULL) {
      *p_scale = 1;
    }
    return true;
  }
+  if (bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L) {
+    exp = exp->in(1);
+    bt = T_INT;
+    if (converted != NULL) {
+      *converted = true;
+    }
+  }
  int opc = exp->Opcode();
  // Can't use is_Mul() here as it's true for AndI and AndL
  if (opc == Op_Mul(bt)) {
-    if (exp->in(1)->uncast() == iv && exp->in(2)->is_Con()) {
+    if (is_iv(exp->in(1)->uncast(), iv, bt) && exp->in(2)->is_Con()) {
      if (p_scale != NULL) {
        *p_scale = exp->in(2)->get_integer_as_long(bt);
      }
      return true;
    }
-    if (exp->in(2)->uncast() == iv && exp->in(1)->is_Con()) {
+    if (is_iv(exp->in(2)->uncast(), iv, bt) && exp->in(1)->is_Con()) {
      if (p_scale != NULL) {
        *p_scale = exp->in(1)->get_integer_as_long(bt);
      }
      return true;
    }
  } else if (opc == Op_LShift(bt)) {
-    if (exp->in(1)->uncast() == iv && exp->in(2)->is_Con()) {
+    if (is_iv(exp->in(1)->uncast(), iv, bt) && exp->in(2)->is_Con()) {
      if (p_scale != NULL) {
        jint shift_amount = exp->in(2)->get_int();
        if (bt == T_INT) {
@ -2552,9 +2572,9 @@ bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType

 //-----------------------------is_scaled_iv_plus_offset------------------------------
 // Return true if exp is a simple induction variable expression: k1*iv + (invar + k2)
-bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, int depth) {
+bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, bool* converted, int depth) {
  assert(bt == T_INT || bt == T_LONG, "unexpected int type");
-  if (is_scaled_iv(exp, iv, p_scale, bt)) {
+  if (is_scaled_iv(exp, iv, p_scale, bt, converted)) {
    if (p_offset != NULL) {
      Node *zero = _igvn.integercon(0, bt);
      set_ctrl(zero, C->root());
@ -2565,13 +2585,13 @@ bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scal
  exp = exp->uncast();
  int opc = exp->Opcode();
  if (opc == Op_Add(bt)) {
-    if (is_scaled_iv(exp->in(1), iv, p_scale, bt)) {
+    if (is_scaled_iv(exp->in(1), iv, p_scale, bt, converted)) {
      if (p_offset != NULL) {
        *p_offset = exp->in(2);
      }
      return true;
    }
-    if (is_scaled_iv(exp->in(2), iv, p_scale, bt)) {
+    if (is_scaled_iv(exp->in(2), iv, p_scale, bt, converted)) {
      if (p_offset != NULL) {
        *p_offset = exp->in(1);
      }
@ -2581,7 +2601,7 @@ bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scal
      Node* offset2 = NULL;
      if (depth < 2 &&
          is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
-                                   p_offset != NULL ? &offset2 : NULL, bt, depth+1)) {
+                                   p_offset != NULL ? &offset2 : NULL, bt, converted, depth+1)) {
        if (p_offset != NULL) {
          Node *ctrl_off2 = get_ctrl(offset2);
          Node* offset = AddNode::make(offset2, exp->in(2), bt);
@ -2592,7 +2612,7 @@ bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scal
      }
    }
  } else if (opc == Op_Sub(bt)) {
-    if (is_scaled_iv(exp->in(1), iv, p_scale, bt)) {
+    if (is_scaled_iv(exp->in(1), iv, p_scale, bt, converted)) {
      if (p_offset != NULL) {
        Node *zero = _igvn.integercon(0, bt);
        set_ctrl(zero, C->root());
@ -2603,7 +2623,7 @@ bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scal
      }
      return true;
    }
-    if (is_scaled_iv(exp->in(2), iv, p_scale, bt)) {
+    if (is_scaled_iv(exp->in(2), iv, p_scale, bt, converted)) {
      if (p_offset != NULL) {
        // We can't handle a scale of min_jint (or min_jlong) here as -1 * min_jint = min_jint
        if (*p_scale == min_signed_integer(bt)) {
@ -3432,6 +3452,8 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
    } else if (policy_unswitching(phase)) {
      phase->do_unswitching(this, old_new);
      return false; // need to recalculate idom data
+    } else if (_head->is_LongCountedLoop()) {
+      phase->create_loop_nest(this, old_new);
    }
    return true;
  }
@ -3475,7 +3497,8 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
  // unrolling), plus any needed for RCE purposes.

  bool should_unroll = policy_unroll(phase);
-  bool should_rce    = policy_range_check(phase, false);
+  bool should_rce    = policy_range_check(phase, false, T_INT);
+  bool should_rce_long = policy_range_check(phase, false, T_LONG);

  // If not RCE'ing (iteration splitting), then we do not need a pre-loop.
  // We may still need to peel an initial iteration but we will not
@ -3490,6 +3513,9 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
  // peeling.
  if (should_rce || should_unroll) {
    if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
+      if (should_rce_long && phase->create_loop_nest(this, old_new)) {
+        return true;
+      }
      uint estimate = est_loop_clone_sz(3);
      if (!phase->may_require_nodes(estimate)) {
        return false;
@ -3531,6 +3557,9 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
        phase->do_peeling(this, old_new);
      }
    }
+    if (should_rce_long) {
+      phase->create_loop_nest(this, old_new);
+    }
  }
  return true;
 }
--- a/src/hotspot/share/opto/loopnode.cpp
+++ b/src/hotspot/share/opto/loopnode.cpp
@ -525,10 +525,16 @@ static bool condition_stride_ok(BoolTest::mask bt, jlong stride_con) {
  return true;
 }

-Node* PhaseIdealLoop::long_loop_replace_long_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head) {
-  Node* iv_as_long = new ConvI2LNode(inner_iv, TypeLong::INT);
-  register_new_node(iv_as_long, inner_head);
-  Node* iv_replacement = new AddLNode(outer_phi, iv_as_long);
+Node* PhaseIdealLoop::loop_nest_replace_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head,
+                                           BasicType bt) {
+  Node* iv_as_long;
+  if (bt == T_LONG) {
+    iv_as_long = new ConvI2LNode(inner_iv, TypeLong::INT);
+    register_new_node(iv_as_long, inner_head);
+  } else {
+    iv_as_long = inner_iv;
+  }
+  Node* iv_replacement = AddNode::make(outer_phi, iv_as_long, bt);
  register_new_node(iv_replacement, inner_head);
  for (DUIterator_Last imin, i = iv_to_replace->last_outs(imin); i >= imin;) {
    Node* u = iv_to_replace->last_out(i);
@ -762,25 +768,36 @@ SafePointNode* PhaseIdealLoop::find_safepoint(Node* back_control, Node* x, Ideal
 //     continue;
 //   else break;
 // }
-bool PhaseIdealLoop::transform_long_counted_loop(IdealLoopTree* loop, Node_List &old_new) {
+//
+// The same logic is used to transform an int counted loop that contains long range checks into a loop nest of 2 int
+// loops with long range checks transformed to int range checks in the inner loop.
+bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
  Node* x = loop->_head;
  // Only for inner loops
-  if (loop->_child != NULL || !x->is_LongCountedLoop() || x->as_Loop()->is_transformed_long_outer_loop()) {
+  if (loop->_child != NULL || !x->is_BaseCountedLoop() || x->as_Loop()->is_loop_nest_outer_loop()) {
    return false;
  }

-  check_long_counted_loop(loop, x);
+  if (x->is_CountedLoop() && !x->as_CountedLoop()->is_main_loop() && !x->as_CountedLoop()->is_normal_loop()) {
+    return false;
+  }

-  LongCountedLoopNode* head = x->as_LongCountedLoop();
+  BaseCountedLoopNode* head = x->as_BaseCountedLoop();
+  BasicType bt = x->as_BaseCountedLoop()->bt();
+
+  check_counted_loop_shape(loop, x, bt);

 #ifndef PRODUCT
-  Atomic::inc(&_long_loop_candidates);
+  if (bt == T_LONG) {
+    Atomic::inc(&_long_loop_candidates);
+  }
 #endif

  jlong stride_con = head->stride_con();
  assert(stride_con != 0, "missed some peephole opt");
  // We can't iterate for more than max int at a time.
  if (stride_con != (jint)stride_con) {
+    assert(bt == T_LONG, "only for long loops");
    return false;
  }
  // The number of iterations for the integer count loop: guarantee no
@ -788,7 +805,7 @@ bool PhaseIdealLoop::transform_long_counted_loop(IdealLoopTree* loop, Node_List
  // loop limit check if the exit test is <= or >=.
  int iters_limit = max_jint - ABS(stride_con) - 1;
 #ifdef ASSERT
-  if (StressLongCountedLoop > 0) {
+  if (bt == T_LONG && StressLongCountedLoop > 0) {
    iters_limit = iters_limit / StressLongCountedLoop;
  }
 #endif
@ -814,24 +831,38 @@ bool PhaseIdealLoop::transform_long_counted_loop(IdealLoopTree* loop, Node_List
  }

  // May not have gone thru igvn yet so don't use _igvn.type(phi) (PhaseIdealLoop::is_counted_loop() sets the iv phi's type)
-  const TypeLong* phi_t = phi->bottom_type()->is_long();
-  assert(phi_t->_hi >= phi_t->_lo, "dead phi?");
-  iters_limit = (int)MIN2((julong)iters_limit, (julong)(phi_t->_hi - phi_t->_lo));
+  const TypeInteger* phi_t = phi->bottom_type()->is_integer(bt);
+  assert(phi_t->hi_as_long() >= phi_t->lo_as_long(), "dead phi?");
+  iters_limit = checked_cast<int>(MIN2((julong)iters_limit, (julong)(phi_t->hi_as_long() - phi_t->lo_as_long())));

-  LongCountedLoopEndNode* exit_test = head->loopexit();
-  BoolTest::mask bt = exit_test->test_trip();
-
-  // We need a safepoint to insert empty predicates for the inner loop.
-  SafePointNode* safepoint = find_safepoint(back_control, x, loop);
+  IfNode* exit_test = head->loopexit();
+  BoolTest::mask mask = exit_test->as_BaseCountedLoopEnd()->test_trip();
+  Node* cmp = exit_test->as_BaseCountedLoopEnd()->cmp_node();

  assert(back_control->Opcode() == Op_IfTrue, "wrong projection for back edge");
-  Node* exit_branch = exit_test->proj_out(false);
-  Node* entry_control = x->in(LoopNode::EntryControl);
-  Node* cmp = exit_test->cmp_node();

  Node_List range_checks;
  iters_limit = extract_long_range_checks(loop, stride_con, iters_limit, phi, range_checks);

+  if (bt == T_INT) {
+    // The only purpose of creating a loop nest is to handle long range checks. If there are none, do not proceed further.
+    if (range_checks.size() == 0) {
+      return false;
+    }
+  }
+
+  // We need a safepoint to insert empty predicates for the inner loop.
+  SafePointNode* safepoint;
+  if (bt == T_INT && head->as_CountedLoop()->is_strip_mined()) {
+    // Loop is strip mined: use the safepoint of the outer strip mined loop
+    strip_mined_nest_back_to_counted_loop(loop, head, back_control, exit_test, safepoint);
+  } else {
+    safepoint = find_safepoint(back_control, x, loop);
+  }
+
+  Node* exit_branch = exit_test->proj_out(false);
+  Node* entry_control = head->in(LoopNode::EntryControl);
+
  // Clone the control flow of the loop to build an outer loop
  Node* outer_back_branch = back_control->clone();
  Node* outer_exit_test = new IfNode(exit_test->in(0), exit_test->in(1), exit_test->_prob, exit_test->_fcnt);
@ -867,19 +898,24 @@ bool PhaseIdealLoop::transform_long_counted_loop(IdealLoopTree* loop, Node_List

  Node* inner_iters_max = NULL;
  if (stride_con > 0) {
-    inner_iters_max = MaxNode::max_diff_with_zero(limit, outer_phi, TypeLong::LONG, _igvn);
+    inner_iters_max = MaxNode::max_diff_with_zero(limit, outer_phi, TypeInteger::bottom(bt), _igvn);
  } else {
-    inner_iters_max = MaxNode::max_diff_with_zero(outer_phi, limit, TypeLong::LONG, _igvn);
+    inner_iters_max = MaxNode::max_diff_with_zero(outer_phi, limit, TypeInteger::bottom(bt), _igvn);
  }

-  Node* inner_iters_limit = _igvn.longcon(iters_limit);
+  Node* inner_iters_limit = _igvn.integercon(iters_limit, bt);
  // inner_iters_max may not fit in a signed integer (iterating from
  // Long.MIN_VALUE to Long.MAX_VALUE for instance). Use an unsigned
  // min.
-  Node* inner_iters_actual = MaxNode::unsigned_min(inner_iters_max, inner_iters_limit, TypeLong::make(0, iters_limit, Type::WidenMin), _igvn);
+  Node* inner_iters_actual = MaxNode::unsigned_min(inner_iters_max, inner_iters_limit, TypeInteger::make(0, iters_limit, Type::WidenMin, bt), _igvn);

-  Node* inner_iters_actual_int = new ConvL2INode(inner_iters_actual);
-  _igvn.register_new_node_with_optimizer(inner_iters_actual_int);
+  Node* inner_iters_actual_int;
+  if (bt == T_LONG) {
+    inner_iters_actual_int = new ConvL2INode(inner_iters_actual);
+    _igvn.register_new_node_with_optimizer(inner_iters_actual_int);
+  } else {
+    inner_iters_actual_int = inner_iters_actual;
+  }

  Node* int_zero = _igvn.intcon(0);
  set_ctrl(int_zero, C->root());
@ -919,11 +955,11 @@ bool PhaseIdealLoop::transform_long_counted_loop(IdealLoopTree* loop, Node_List

  // Replace inner loop long iv phi as inner loop int iv phi + outer
  // loop iv phi
-  Node* iv_add = long_loop_replace_long_iv(phi, inner_phi, outer_phi, head);
+  Node* iv_add = loop_nest_replace_iv(phi, inner_phi, outer_phi, head, bt);

  // Replace inner loop long iv incr with inner loop int incr + outer
  // loop iv phi
-  long_loop_replace_long_iv(incr, inner_incr, outer_phi, head);
+  loop_nest_replace_iv(incr, inner_incr, outer_phi, head, bt);

  set_subtree_ctrl(inner_iters_actual_int, body_populated);

@ -983,6 +1019,11 @@ bool PhaseIdealLoop::transform_long_counted_loop(IdealLoopTree* loop, Node_List
  //     exit_branch: break;  //in(0) := outer_exit_test
  // }

+  if (bt == T_INT) {
+    outer_phi = new ConvI2LNode(outer_phi);
+    register_new_node(outer_phi, outer_head);
+  }
+
  transform_long_range_checks(checked_cast<int>(stride_con), range_checks, outer_phi, inner_iters_actual_int,
                              inner_phi, iv_add, inner_head);
  // Peel one iteration of the loop and use the safepoint at the end
@ -1009,15 +1050,79 @@ bool PhaseIdealLoop::transform_long_counted_loop(IdealLoopTree* loop, Node_List
  }

 #ifndef PRODUCT
-  Atomic::inc(&_long_loop_nests);
+  if (bt == T_LONG) {
+    Atomic::inc(&_long_loop_nests);
+  }
 #endif

-  inner_head->mark_transformed_long_inner_loop();
-  outer_head->mark_transformed_long_outer_loop();
+  inner_head->mark_loop_nest_inner_loop();
+  outer_head->mark_loop_nest_outer_loop();

  return true;
 }

+// Convert the strip mined loop nest back to a single loop with the safepoint right before the loop exit test
+void PhaseIdealLoop::strip_mined_nest_back_to_counted_loop(IdealLoopTree* loop, const BaseCountedLoopNode* head,
+                                                           Node* back_control, IfNode*& exit_test,
+                                                           SafePointNode*& safepoint) {
+  CountedLoopNode* cl = head->as_CountedLoop();
+  cl->verify_strip_mined(1);
+  safepoint = cl->outer_safepoint();
+  CountedLoopEndNode* cle = cl->loopexit();
+  OuterStripMinedLoopNode* outer_head = cl->outer_loop();
+  OuterStripMinedLoopEndNode* outer_end = cl->outer_loop_end();
+
+  cl->clear_strip_mined();
+
+  _igvn.replace_input_of(cl, LoopNode::EntryControl, outer_head->in(LoopNode::EntryControl));
+  _igvn.replace_input_of(outer_head, LoopNode::EntryControl, C->top());
+  set_idom(cl, cl->in(LoopNode::EntryControl), dom_depth(cl));
+
+  Node* exit_bol = cle->in(1);
+  Node *zero = _igvn.intcon(0);
+  set_ctrl(zero, C->root());
+  _igvn.replace_input_of(cle, 1, zero);
+
+  _igvn.replace_input_of(outer_end, 1, exit_bol);
+
+  assert(outer_head->in(LoopNode::LoopBackControl)->in(0) == outer_end, "");
+  _igvn.replace_input_of(outer_head->in(LoopNode::LoopBackControl), 0, C->top());
+  _igvn.replace_input_of(back_control, 0, outer_end);
+  set_idom(back_control, outer_end, dom_depth(outer_end) + 1);
+
+  Unique_Node_List wq;
+  wq.push(safepoint);
+
+  IdealLoopTree* outer_loop_ilt = get_loop(outer_head);
+
+  for (uint i = 0; i < wq.size(); i++) {
+    Node* n = wq.at(i);
+    for (uint j = 0; j < n->req(); ++j) {
+      Node* in = n->in(j);
+      if (in == NULL || in->is_CFG()) {
+        continue;
+      }
+      if (get_loop(get_ctrl(in)) != outer_loop_ilt) {
+        continue;
+      }
+      assert(!loop->_body.contains(in), "");
+      loop->_body.push(in);
+      wq.push(in);
+    }
+  }
+
+  set_loop(outer_end, loop);
+  loop->_body.push(outer_end);
+  set_loop(safepoint, loop);
+  loop->_body.push(safepoint);
+  set_loop(safepoint->in(0), loop);
+  loop->_body.push(safepoint->in(0));
+
+  exit_test = outer_end;
+
+  outer_loop_ilt->_tail = C->top();
+}
+
 int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jlong stride_con, int iters_limit, PhiNode* phi,
                                              Node_List& range_checks) {
  if (stride_con < 0) { // only for stride_con > 0 && scale > 0 for now
@ -1164,7 +1269,8 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List
      // could be shared and have already been taken care of
      continue;
    }
-    bool ok = is_scaled_iv_plus_offset(rc_cmp->in(1), iv_add, &scale, &offset, T_LONG);
+    bool converted = false;
+    bool ok = is_scaled_iv_plus_offset(rc_cmp->in(1), iv_add, &scale, &offset, T_LONG, &converted);
    assert(ok, "inconsistent: was tested before");
    Node* range = rc_cmp->in(2);
    Node* c = rc->in(0);
@ -1173,7 +1279,44 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List
    Node* R = range;
    Node* K = _igvn.longcon(scale);
    set_ctrl(K, this->C->root());
+
    Node* L = offset;
+
+    if (converted) {
+      // This converts:
+      // i*K + L <u64 R
+      // with K an int into:
+      // i*(long)K + L <u64 unsigned_min((long)max_jint + L + 1, R)
+      // to protect against an overflow of i*K
+      //
+      // Because if i*K overflows, there are K,L where:
+      // i*K + L <u64 R is false
+      // when
+      // i*(long)K is > (long)max_jint and < R
+      // and so i*(long)K + L <u64 R is true
+      // As a consequence simply converting:
+      // i*K + L <u64 R to i*(long)K + L <u64 R could cause incorrect execution
+      //
+      // It's always true that:
+      // i*K <u64 (long)max_jint + 1
+      // which implies i*K + L <u64 (long)max_jint + 1 + L
+      // As a consequence:
+      // i*(long)K + L <u64 unsigned_min((long)max_jint + L + 1, R)
+      // is always false in case of overflow of i*K
+      //
+      // Note, there are K,L where i*K overflows and
+      // i*K + L <u64 R is true, but
+      // i*(long)K + L <u64 unsigned_min((long)max_jint + L + 1, R) is false
+      // So this transformation could cause spurious deoptimizations and failed range check elimination
+      // (but not incorrect execution) for unlikely corner cases with overflow
+      Node* max_jint_plus_one_long = _igvn.longcon((jlong)max_jint + 1);
+      set_ctrl(max_jint_plus_one_long, C->root());
+      Node* max_range = new AddLNode(max_jint_plus_one_long, L);
+      register_new_node(max_range, entry_control);
+      R = MaxNode::unsigned_min(R, max_range, TypeLong::POS, _igvn);
+      set_subtree_ctrl(R, true);
+    }
+
    Node* C = outer_phi;
    Node* Z_2 = new ConvI2LNode(inner_iters_actual_int, TypeLong::LONG);
    register_new_node(Z_2, entry_control);
@ -1251,8 +1394,8 @@ Node* PhaseIdealLoop::clamp(Node* R, Node* L, Node* H) {
  return max;
 }

-LoopNode* PhaseIdealLoop::create_inner_head(IdealLoopTree* loop, LongCountedLoopNode* head,
-                                            LongCountedLoopEndNode* exit_test) {
+LoopNode* PhaseIdealLoop::create_inner_head(IdealLoopTree* loop, BaseCountedLoopNode* head,
+                                            IfNode* exit_test) {
  LoopNode* new_inner_head = new LoopNode(head->in(1), head->in(2));
  IfNode* new_inner_exit = new IfNode(exit_test->in(0), exit_test->in(1), exit_test->_prob, exit_test->_fcnt);
  _igvn.register_new_node_with_optimizer(new_inner_head);
@ -1272,21 +1415,21 @@ LoopNode* PhaseIdealLoop::create_inner_head(IdealLoopTree* loop, LongCountedLoop
 }

 #ifdef ASSERT
-void PhaseIdealLoop::check_long_counted_loop(IdealLoopTree* loop, Node* x) {
+void PhaseIdealLoop::check_counted_loop_shape(IdealLoopTree* loop, Node* x, BasicType bt) {
  Node* back_control = loop_exit_control(x, loop);
  assert(back_control != NULL, "no back control");

-  BoolTest::mask bt = BoolTest::illegal;
+  BoolTest::mask mask = BoolTest::illegal;
  float cl_prob = 0;
  Node* incr = NULL;
  Node* limit = NULL;

-  Node* cmp = loop_exit_test(back_control, loop, incr, limit, bt, cl_prob);
-  assert(cmp != NULL && cmp->Opcode() == Op_CmpL, "no exit test");
+  Node* cmp = loop_exit_test(back_control, loop, incr, limit, mask, cl_prob);
+  assert(cmp != NULL && cmp->Opcode() == Op_Cmp(bt), "no exit test");

  Node* phi_incr = NULL;
  incr = loop_iv_incr(incr, x, loop, phi_incr);
-  assert(incr != NULL && incr->Opcode() == Op_AddL, "no incr");
+  assert(incr != NULL && incr->Opcode() == Op_Add(bt), "no incr");

  Node* xphi = NULL;
  Node* stride = loop_iv_stride(incr, loop, xphi);
@ -1297,11 +1440,11 @@ void PhaseIdealLoop::check_long_counted_loop(IdealLoopTree* loop, Node* x) {

  assert(phi != NULL && phi->in(LoopNode::LoopBackControl) == incr, "No phi");

-  jlong stride_con = stride->get_long();
+  jlong stride_con = stride->get_integer_as_long(bt);

-  assert(condition_stride_ok(bt, stride_con), "illegal condition");
+  assert(condition_stride_ok(mask, stride_con), "illegal condition");

-  assert(bt != BoolTest::ne, "unexpected condition");
+  assert(mask != BoolTest::ne, "unexpected condition");
  assert(phi_incr == NULL, "bad loop shape");
  assert(cmp->in(1) == incr, "bad exit test shape");

@ -1630,7 +1773,7 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
    if (sov < 0) {
      return false;  // Bailout: integer overflow is certain.
    }
-    assert(!x->as_Loop()->is_transformed_long_inner_loop(), "long loop was transformed");
+    assert(!x->as_Loop()->is_loop_nest_inner_loop(), "loop was transformed");
    // Generate loop's limit check.
    // Loop limit check predicate should be near the loop.
    ProjNode *limit_check_proj = find_predicate_insertion_point(init_control, Deoptimization::Reason_loop_limit_check);
@ -1720,7 +1863,7 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_

 #ifdef ASSERT
  if (iv_bt == T_INT &&
-      !x->as_Loop()->is_transformed_long_inner_loop() &&
+      !x->as_Loop()->is_loop_nest_inner_loop() &&
      StressLongCountedLoop > 0 &&
      trunc1 == NULL &&
      convert_to_long_loop(cmp, phi, loop)) {
@ -1925,12 +2068,12 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
  }

 #ifndef PRODUCT
-  if (x->as_Loop()->is_transformed_long_inner_loop()) {
+  if (x->as_Loop()->is_loop_nest_inner_loop() && iv_bt == T_LONG) {
    Atomic::inc(&_long_loop_counted_loops);
  }
 #endif
-  if (iv_bt == T_LONG && x->as_Loop()->is_transformed_long_outer_loop()) {
-    l->mark_transformed_long_outer_loop();
+  if (iv_bt == T_LONG && x->as_Loop()->is_loop_nest_outer_loop()) {
+    l->mark_loop_nest_outer_loop();
  }

  return true;
@ -2405,11 +2548,6 @@ int CountedLoopNode::stride_con() const {
  return cle != NULL ? cle->stride_con() : 0;
 }

-jlong LongCountedLoopNode::stride_con() const {
-  LongCountedLoopEndNode* cle = loopexit_or_null();
-  return cle != NULL ? cle->stride_con() : 0;
-}
-
 BaseCountedLoopNode* BaseCountedLoopNode::make(Node* entry, Node* backedge, BasicType bt) {
  if (bt == T_INT) {
    return new CountedLoopNode(entry, backedge);
@ -3542,7 +3680,7 @@ void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) {
             phase->is_counted_loop(_head, loop, T_LONG)) {
    remove_safepoints(phase, true);
  } else {
-    assert(!_head->is_Loop() || !_head->as_Loop()->is_transformed_long_inner_loop(), "transformation to counted loop should not fail");
+    assert(!_head->is_Loop() || !_head->as_Loop()->is_loop_nest_inner_loop(), "transformation to counted loop should not fail");
    if (_parent != NULL && !_irreducible) {
      // Not a counted loop. Keep one safepoint.
      bool keep_one_sfpt = true;
@ -4214,7 +4352,9 @@ void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) {
      // Because RCE opportunities can be masked by split_thru_phi,
      // look for RCE candidates and inhibit split_thru_phi
      // on just their loop-phi's for this pass of loop opts
-      if (SplitIfBlocks && do_split_ifs && lpt->policy_range_check(this, true)) {
+      if (SplitIfBlocks && do_split_ifs &&
+          (lpt->policy_range_check(this, true, T_LONG) ||
+           (head->is_CountedLoop() && lpt->policy_range_check(this, true, T_INT)))) {
        lpt->_rce_candidate = 1; // = true
      }
    }
@ -4266,13 +4406,6 @@ void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) {
  // Do verify graph edges in any case
  NOT_PRODUCT( C->verify_graph_edges(); );

-  if (C->has_loops() && !C->major_progress()) {
-    for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
-      IdealLoopTree *lpt = iter.current();
-      transform_long_counted_loop(lpt, worklist);
-    }
-  }
-
  if (!do_split_ifs) {
    // We saw major progress in Split-If to get here.  We forced a
    // pass with unrolling and not split-if, however more split-if's
--- a/src/hotspot/share/opto/loopnode.hpp
+++ b/src/hotspot/share/opto/loopnode.hpp
@ -77,8 +77,8 @@ protected:
         StripMined          = 1<<15,
         SubwordLoop         = 1<<16,
         ProfileTripFailed   = 1<<17,
-         TransformedLongInnerLoop = 1<<18,
-         TransformedLongOuterLoop = 1<<19};
+         LoopNestInnerLoop = 1 << 18,
+         LoopNestLongOuterLoop = 1 << 19};
  char _unswitch_count;
  enum { _unswitch_max=3 };
  char _postloop_flags;
@ -103,8 +103,8 @@ public:
  bool is_strip_mined() const { return _loop_flags & StripMined; }
  bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
  bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
-  bool is_transformed_long_inner_loop() const { return _loop_flags & TransformedLongInnerLoop; }
-  bool is_transformed_long_outer_loop() const { return _loop_flags & TransformedLongOuterLoop; }
+  bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
+  bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }

  void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
  void mark_has_reductions() { _loop_flags |= HasReductions; }
@ -119,8 +119,8 @@ public:
  void clear_strip_mined() { _loop_flags &= ~StripMined; }
  void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
  void mark_subword_loop() { _loop_flags |= SubwordLoop; }
-  void mark_transformed_long_inner_loop() { _loop_flags |= TransformedLongInnerLoop; }
-  void mark_transformed_long_outer_loop() { _loop_flags |= TransformedLongOuterLoop; }
+  void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
+  void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }

  int unswitch_max() { return _unswitch_max; }
  int unswitch_count() { return _unswitch_count; }
@ -216,6 +216,8 @@ public:

  virtual BasicType bt() const = 0;

+  jlong stride_con() const;
+
  static BaseCountedLoopNode* make(Node* entry, Node* backedge, BasicType bt);
 };

@ -364,7 +366,6 @@ public:

  LongCountedLoopEndNode* loopexit_or_null() const { return (LongCountedLoopEndNode*) BaseCountedLoopNode::loopexit_or_null(); }
  LongCountedLoopEndNode* loopexit() const { return (LongCountedLoopEndNode*) BaseCountedLoopNode::loopexit(); }
-  jlong   stride_con() const;
 };


@ -512,6 +513,12 @@ inline Node* BaseCountedLoopNode::phi() const {
  return cle != NULL ? cle->phi() : NULL;
 }

+inline jlong BaseCountedLoopNode::stride_con() const {
+  BaseCountedLoopEndNode* cle = loopexit_or_null();
+  return cle != NULL ? cle->stride_con() : 0;
+}
+
+
 //------------------------------LoopLimitNode-----------------------------
 // Counted Loop limit node which represents exact final iterator value:
 // trip_count = (limit - init_trip + stride - 1)/stride
@ -710,7 +717,7 @@ public:
  // Return TRUE or FALSE if the loop should be range-check-eliminated.
  // Gather a list of IF tests that are dominated by iteration splitting;
  // also gather the end of the first split and the start of the 2nd split.
-  bool policy_range_check(PhaseIdealLoop* phase, bool provisional) const;
+  bool policy_range_check(PhaseIdealLoop* phase, bool provisional, BasicType bt) const;

  // Return TRUE if "iff" is a range check.
  bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar DEBUG_ONLY(COMMA ProjNode *predicate_proj)) const;
@ -1146,8 +1153,8 @@ public:

  bool is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_bt);

-  Node* long_loop_replace_long_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head);
-  bool transform_long_counted_loop(IdealLoopTree* loop, Node_List &old_new);
+  Node* loop_nest_replace_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head, BasicType bt);
+  bool create_loop_nest(IdealLoopTree* loop, Node_List &old_new);
 #ifdef ASSERT
  bool convert_to_long_loop(Node* cmp, Node* phi, IdealLoopTree* loop);
 #endif
@ -1248,10 +1255,12 @@ public:
  void mark_reductions( IdealLoopTree *loop );

  // Return true if exp is a constant times an induction var
-  bool is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt);
+  bool is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt, bool* converted);
+
+  bool is_iv(Node* exp, Node* iv, BasicType bt);

  // Return true if exp is a scaled induction var plus (or minus) constant
-  bool is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, int depth = 0);
+  bool is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, bool* converted = NULL, int depth = 0);
  bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset) {
    jlong long_scale;
    if (is_scaled_iv_plus_offset(exp, iv, &long_scale, p_offset, T_INT)) {
@ -1610,9 +1619,9 @@ public:

  void rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const;

-  void check_long_counted_loop(IdealLoopTree* loop, Node* x) NOT_DEBUG_RETURN;
+  void check_counted_loop_shape(IdealLoopTree* loop, Node* x, BasicType bt) NOT_DEBUG_RETURN;

-  LoopNode* create_inner_head(IdealLoopTree* loop, LongCountedLoopNode* head, LongCountedLoopEndNode* exit_test);
+  LoopNode* create_inner_head(IdealLoopTree* loop, BaseCountedLoopNode* head, IfNode* exit_test);


  int extract_long_range_checks(const IdealLoopTree* loop, jlong stride_con, int iters_limit, PhiNode* phi,
@ -1635,6 +1644,9 @@ public:
  Node* clamp(Node* R, Node* L, Node* H);

  bool safe_for_if_replacement(const Node* dom) const;
+
+  void strip_mined_nest_back_to_counted_loop(IdealLoopTree* loop, const BaseCountedLoopNode* head, Node* back_control,
+                                             IfNode*&exit_test, SafePointNode*&safepoint);
 };


--- a/src/hotspot/share/opto/loopopts.cpp
+++ b/src/hotspot/share/opto/loopopts.cpp
@ -1053,7 +1053,7 @@ Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {

  // Do not clone the trip counter through on a CountedLoop
  // (messes up the canonical shape).
-  if (((n_blk->is_CountedLoop() || (n_blk->is_Loop() && n_blk->as_Loop()->is_transformed_long_inner_loop())) && n->Opcode() == Op_AddI) ||
+  if (((n_blk->is_CountedLoop() || (n_blk->is_Loop() && n_blk->as_Loop()->is_loop_nest_inner_loop())) && n->Opcode() == Op_AddI) ||
      (n_blk->is_LongCountedLoop() && n->Opcode() == Op_AddL)) {
    return n;
  }
--- a/test/hotspot/jtreg/compiler/c2/irTests/TestLongRangeChecks.java
+++ b/test/hotspot/jtreg/compiler/c2/irTests/TestLongRangeChecks.java
@ -28,7 +28,7 @@ import java.util.Objects;

 /*
 * @test
- * @bug 8259609
+ * @bug 8259609 8276116
 * @summary C2: optimize long range checks in long counted loops
 * @library /test/lib /
 * @run driver compiler.c2.irTests.TestLongRangeChecks
@ -41,8 +41,8 @@ public class TestLongRangeChecks {


    @Test
-    @IR(counts = { IRNode.LOOP, "1"})
-    @IR(failOn = { IRNode.COUNTEDLOOP})
+    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(failOn = { IRNode.COUNTEDLOOP })
    public static void testStridePosScalePos(long start, long stop, long length, long offset) {
        final long scale = 1;
        final long stride = 1;
@ -60,4 +60,40 @@ public class TestLongRangeChecks {
    private void testStridePosScalePos_runner() {
        testStridePosScalePos(0, 100, 100, 0);
    }
+
+    @Test
+    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(failOn = { IRNode.COUNTEDLOOP })
+    public static void testStridePosScalePosInIntLoop1(int start, int stop, long length, long offset) {
+        final long scale = 2;
+        final int stride = 1;
+
+        // Same but with int loop
+        for (int i = start; i < stop; i += stride) {
+            Objects.checkIndex(scale * i + offset, length);
+        }
+    }
+
+    @Run(test = "testStridePosScalePosInIntLoop1")
+    private void testStridePosScalePosInIntLoop1_runner() {
+        testStridePosScalePosInIntLoop1(0, 100, 200, 0);
+    }
+
+    @Test
+    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(failOn = { IRNode.COUNTEDLOOP })
+    public static void testStridePosScalePosInIntLoop2(int start, int stop, long length, long offset) {
+        final int scale = 2;
+        final int stride = 1;
+
+        // Same but with int loop
+        for (int i = start; i < stop; i += stride) {
+            Objects.checkIndex(scale * i + offset, length);
+        }
+    }
+
+    @Run(test = "testStridePosScalePosInIntLoop2")
+    private void testStridePosScalePosInIntLoop2_runner() {
+        testStridePosScalePosInIntLoop2(0, 100, 200, 0);
+    }
 }
--- a/test/hotspot/jtreg/compiler/rangechecks/TestLongRangeCheck.java
+++ b/test/hotspot/jtreg/compiler/rangechecks/TestLongRangeCheck.java
@ -23,7 +23,7 @@

 /**
 * @test
- * @bug 8259609
+ * @bug 8259609 8276116
 * @summary C2: optimize long range checks in long counted loops
 * @requires vm.compiler2.enabled
 * @requires vm.compMode != "Xcomp"
@ -32,7 +32,7 @@
 * @build sun.hotspot.WhiteBox
 * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
 *
- * @run main/othervm -ea -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:-BackgroundCompilation TestLongRangeCheck
+ * @run main/othervm -ea -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:-BackgroundCompilation -XX:-UseOnStackReplacement TestLongRangeCheck
 *
 */

@ -193,6 +193,115 @@ public class TestLongRangeCheck {
            m.invoke(null, 0, 100, Long.MAX_VALUE, Long.MAX_VALUE - 50, 0, 50);
            assertIsCompiled(m);
        }
+
+        test("testStridePosScalePosInIntLoop", 0, 100, 100, 0);
+
+        test("testStrideNegScaleNegInIntLoop", 0, 100, 100, 100);
+
+        test("testStrideNegScalePosInIntLoop", 0, 100, 100, 0);
+
+        test("testStridePosScaleNegInIntLoop", 0, 100, 100, 99);
+
+        test("testStridePosScalePosNotOneInIntLoop", 0, 100, 1090, 0);
+
+        test("testStrideNegScaleNegNotOneInIntLoop", 0, 100, 1090, 1100);
+
+        test("testStrideNegScalePosNotOneInIntLoop", 0, 100, 1090, 0);
+
+        test("testStridePosScaleNegNotOneInIntLoop", 0, 100, 1090, 1089);
+
+        v = ((long)Integer.MAX_VALUE / 10000) * 9999;
+
+        test("testStridePosNotOneScalePosInIntLoop", -v, v, v * 4, 2 * v);
+
+        test("testStrideNegNotOneScaleNegInIntLoop", -v, v, v * 4, 2 * v);
+
+        test("testStrideNegNotOneScalePosInIntLoop", -v, v, v * 4, 2 * v);
+
+        test("testStridePosNotOneScaleNegInIntLoop", -v, v, v * 4, 2 * v - 1);
+
+        // offset causes overflow
+        {
+            Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosInIntLoop", long.class, long.class, long.class, long.class);
+            m.invoke(null, 0, 100, 100, 0);
+            compile(m);
+
+            m.invoke(null, 0, 100, 100, 0);
+            assertIsCompiled(m);
+            try {
+                m.invoke(null, 0, 100, 100, Long.MAX_VALUE - 50);
+                throw new RuntimeException("should have thrown");
+            } catch(InvocationTargetException e) {
+                if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
+                    throw new RuntimeException("unexpected exception");
+                }
+            }
+            assertIsNotCompiled(m);
+        }
+        // no spurious deopt if the range check doesn't fail because not executed
+        {
+            Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosConditional", long.class, long.class, long.class, long.class, long.class, long.class);
+            m.invoke(null, 0, 100, 100, 0, 0, 100);
+            compile(m);
+
+            m.invoke(null, 0, 100, 100, -50, 50, 100);
+            assertIsCompiled(m);
+        }
+        {
+            Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosConditional", long.class, long.class, long.class, long.class, long.class, long.class);
+            m.invoke(null, 0, 100, 100, 0, 0, 100);
+            compile(m);
+
+            m.invoke(null, 0, 100, Long.MAX_VALUE, Long.MAX_VALUE - 50, 0, 50);
+            assertIsCompiled(m);
+        }
+
+        test("testStridePosScalePosNotOneInIntLoop2", 0, 100, 1090, 0);
+
+        test("testStrideNegScaleNegNotOneInIntLoop2", 0, 100, 1090, 1100);
+
+        test("testStrideNegScalePosNotOneInIntLoop2", 0, 100, 1090, 0);
+
+        test("testStridePosScaleNegNotOneInIntLoop2", 0, 100, 1090, 1089);
+
+        {
+            Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosInIntLoopOverflow", long.class, long.class, long.class, long.class);
+            long stride = 1 << 14;
+            long scale = 1 << 15;
+            long offset = stride * scale * 4;
+            long length = offset + stride * scale * 3 + 1;
+            long stop = stride * 5;
+
+            m.invoke(null, 0, stop, length, offset);
+            compile(m);
+
+            m.invoke(null, 0, stop, length, offset);
+            // deoptimizes even though no range check fails
+        }
+        {
+            Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosInIntLoopOverflow", long.class, long.class, long.class, long.class);
+            long stride = 1 << 14;
+            long scale = 1 << 15;
+            long offset = stride * scale * 4;
+            long length = offset + stride * scale * 3 + 1;
+            long stop = stride * 5;
+
+            m.invoke(null, 0, stop, length, offset);
+            compile(m);
+
+            offset = 0;
+            stop = stride * 5;
+
+            try {
+                m.invoke(null, 0, stop, length, offset);
+                throw new RuntimeException("should have thrown");
+            } catch(InvocationTargetException e) {
+                if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
+                    throw new RuntimeException("unexpected exception");
+                }
+            }
+            assertIsNotCompiled(m);
+        }
    }

    public static void testStridePosScalePos(long start, long stop, long length, long offset) {
@ -301,4 +410,177 @@ public class TestLongRangeCheck {
            }
        }
    }
+
+    private static void checkInputs(long... inputs) {
+        for (int i = 0; i < inputs.length; i++) {
+            if ((long)((int)inputs[i]) != inputs[i]) {
+                throw new RuntimeException("bad arguments");
+            }
+        }
+    }
+
+    public static void testStridePosScalePosInIntLoop(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final long scale = 1;
+        final int stride = 1;
+        for (int i = (int)start; i < (int)stop; i += stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStrideNegScaleNegInIntLoop(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final long scale = -1;
+        final int stride = 1;
+        for (int i = (int)stop; i > (int)start; i -= stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStrideNegScalePosInIntLoop(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final long scale = 1;
+        final int stride = 1;
+        for (int i = (int)(stop-1); i >= (int)start; i -= stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStridePosScaleNegInIntLoop(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final long scale = -1;
+        final int stride = 1;
+        for (int i = (int)start; i < (int)stop; i += stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStridePosScalePosNotOneInIntLoop(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final long scale = 11;
+        final int stride = 1;
+        for (int i = (int)start; i < (int)stop; i += stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStrideNegScaleNegNotOneInIntLoop(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final long scale = -11;
+        final int stride = 1;
+        for (int i = (int)stop; i > (int)start; i -= stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStrideNegScalePosNotOneInIntLoop(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final long scale = 11;
+        final int stride = 1;
+        for (int i = (int)(stop-1); i >= (int)start; i -= stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStridePosScaleNegNotOneInIntLoop(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final long scale = -11;
+        final int stride = 1;
+        for (int i = (int)start; i < (int)stop; i += stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStridePosNotOneScalePosInIntLoop(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final long scale = 2;
+        final int stride = Integer.MAX_VALUE / 10000;
+        for (int i = (int)start; i < (int)stop; i += stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStrideNegNotOneScaleNegInIntLoop(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final long scale = -2;
+        final int stride = Integer.MAX_VALUE / 10000;
+        for (int i = (int)stop; i > (int)start; i -= stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStrideNegNotOneScalePosInIntLoop(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final long scale = 2;
+        final int stride = Integer.MAX_VALUE / 10000;
+        for (int i = (int)(stop-1); i >= (int)start; i -= stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStridePosNotOneScaleNegInIntLoop(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final long scale = -2;
+        final int stride = Integer.MAX_VALUE / 10000;
+        for (int i = (int)start; i < (int)stop; i += stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStridePosScalePosConditionalInIntLoop(long start, long stop, long length, long offset, long start2, long stop2) {
+        checkInputs(start, stop, start2, stop2);
+        Preconditions.checkIndex(0, length, null);
+        final long scale = 1;
+        final int stride = 1;
+        for (int i = (int)start; i < (int)stop; i += stride) {
+            if (i >= (int)start2 && i < (int)stop2) {
+                Preconditions.checkIndex(scale * i + offset, length, null);
+            }
+        }
+    }
+
+    public static void testStridePosScalePosNotOneInIntLoop2(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final int scale = 11;
+        final int stride = 1;
+        for (int i = (int)start; i < (int)stop; i += stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStrideNegScaleNegNotOneInIntLoop2(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final int scale = -11;
+        final int stride = 1;
+        for (int i = (int)stop; i > (int)start; i -= stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStrideNegScalePosNotOneInIntLoop2(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final int scale = 11;
+        final int stride = 1;
+        for (int i = (int)(stop-1); i >= (int)start; i -= stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStridePosScaleNegNotOneInIntLoop2(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final int scale = -11;
+        final int stride = 1;
+        for (int i = (int)start; i < (int)stop; i += stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
+
+    public static void testStridePosScalePosInIntLoopOverflow(long start, long stop, long length, long offset) {
+        checkInputs(start, stop);
+        final int scale = 1 << 15;
+        final int stride = 1 << 14;
+        for (int i = (int)start; i < (int)stop; i += stride) {
+            Preconditions.checkIndex(scale * i + offset, length, null);
+        }
+    }
 }