8339733: C2: some nodes can have incorrect control after do_range_check()

Reviewed-by: chagedorn, thartmann
2024-09-12 07:19:54 +00:00 · 2024-09-12 07:19:54 +00:00 · 315abdf8c8
commit 315abdf8c8
parent ac3f92b411
2 changed files with 86 additions and 23 deletions
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@ -1683,8 +1683,8 @@ void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_n
  // use by range check elimination.
  Node *pre_opaq  = new Opaque1Node(C, pre_limit, limit);

-  register_new_node(pre_limit, pre_head->in(0));
-  register_new_node(pre_opaq , pre_head->in(0));
+  register_new_node(pre_limit, pre_head->in(LoopNode::EntryControl));
+  register_new_node(pre_opaq , pre_head->in(LoopNode::EntryControl));

  // Since no other users of pre-loop compare, I can hack limit directly
  assert(cmp_end->outcnt() == 1, "no other users");
@ -2790,6 +2790,7 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
  // Find the main loop limit; we will trim it's iterations
  // to not ever trip end tests
  Node *main_limit = cl->limit();
+  Node* main_limit_ctrl = get_ctrl(main_limit);

  // Check graph shape. Cannot optimize a loop if zero-trip
  // Opaque1 node is optimized away and then another round
@ -2822,9 +2823,15 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
  }
  Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
  Node *pre_limit = pre_opaq->in(1);
+  Node* pre_limit_ctrl = get_ctrl(pre_limit);

  // Where do we put new limit calculations
-  Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
+  Node* pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
+  // Range check elimination optimizes out conditions whose parameters are loop invariant in the main loop. They usually
+  // have control above the pre loop, but there's no guarantee that they do. There's no guarantee either that the pre
+  // loop limit has control that's out of loop (a previous round of range check elimination could have set a limit that's
+  // not loop invariant).
+  Node* new_limit_ctrl = dominated_node(pre_ctrl, pre_limit_ctrl);

  // Ensure the original loop limit is available from the
  // pre-loop Opaque1 node.
@ -2884,14 +2891,14 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
      Node *limit  = cmp->in(2);
      int scale_con= 1;        // Assume trip counter not scaled

-      Node *limit_c = get_ctrl(limit);
-      if (loop->is_member(get_loop(limit_c))) {
+      Node* limit_ctrl = get_ctrl(limit);
+      if (loop->is_member(get_loop(limit_ctrl))) {
        // Compare might have operands swapped; commute them
        b_test = b_test.commute();
        rc_exp = cmp->in(2);
        limit  = cmp->in(1);
-        limit_c = get_ctrl(limit);
-        if (loop->is_member(get_loop(limit_c))) {
+        limit_ctrl = get_ctrl(limit);
+        if (loop->is_member(get_loop(limit_ctrl))) {
          continue;             // Both inputs are loop varying; cannot RCE
        }
      }
@ -2900,11 +2907,11 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
      // 'limit' maybe pinned below the zero trip test (probably from a
      // previous round of rce), in which case, it can't be used in the
      // zero trip test expression which must occur before the zero test's if.
-      if (is_dominator(ctrl, limit_c)) {
+      if (is_dominator(ctrl, limit_ctrl)) {
        continue;  // Don't rce this check but continue looking for other candidates.
      }

-      assert(is_dominator(compute_early_ctrl(limit, limit_c), pre_end), "node pinned on loop exit test?");
+      assert(is_dominator(compute_early_ctrl(limit, limit_ctrl), pre_end), "node pinned on loop exit test?");

      // Check for scaled induction variable plus an offset
      Node *offset = nullptr;
@ -2913,19 +2920,26 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
        continue;
      }

-      Node *offset_c = get_ctrl(offset);
-      if (loop->is_member(get_loop(offset_c))) {
+      Node* offset_ctrl = get_ctrl(offset);
+      if (loop->is_member(get_loop(offset_ctrl))) {
        continue;               // Offset is not really loop invariant
      }
      // Here we know 'offset' is loop invariant.

      // As above for the 'limit', the 'offset' maybe pinned below the
      // zero trip test.
-      if (is_dominator(ctrl, offset_c)) {
+      if (is_dominator(ctrl, offset_ctrl)) {
        continue; // Don't rce this check but continue looking for other candidates.
      }

-      assert(is_dominator(compute_early_ctrl(offset, offset_c), pre_end), "node pinned on loop exit test?");
+      // offset and limit can have control set below the pre loop when they are not loop invariant in the pre loop.
+      // Update their control (and the control of inputs as needed) to be above pre_end
+      offset_ctrl = ensure_node_and_inputs_are_above_pre_end(pre_end, offset);
+      limit_ctrl = ensure_node_and_inputs_are_above_pre_end(pre_end, limit);
+
+      // offset and limit could have control below new_limit_ctrl if they are not loop invariant in the pre loop.
+      new_limit_ctrl = dominated_node(new_limit_ctrl, offset_ctrl, limit_ctrl);
+
 #ifdef ASSERT
      if (TraceRangeLimitCheck) {
        tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
@ -2945,16 +2959,16 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
      jlong lscale_con = scale_con;
      Node* int_offset = offset;
      offset = new ConvI2LNode(offset);
-      register_new_node(offset, pre_ctrl);
+      register_new_node(offset, new_limit_ctrl);
      Node* int_limit = limit;
      limit = new ConvI2LNode(limit);
-      register_new_node(limit, pre_ctrl);
+      register_new_node(limit, new_limit_ctrl);

      // Adjust pre and main loop limits to guard the correct iteration set
      if (cmp->Opcode() == Op_CmpU) { // Unsigned compare is really 2 tests
        if (b_test._test == BoolTest::lt) { // Range checks always use lt
          // The underflow and overflow limits: 0 <= scale*I+offset < limit
-          add_constraint(stride_con, lscale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit);
+          add_constraint(stride_con, lscale_con, offset, zero, limit, new_limit_ctrl, &pre_limit, &main_limit);
          Node* init = cl->init_trip();
          Node* opaque_init = new OpaqueLoopInitNode(C, init);
          register_new_node(opaque_init, loop_entry);
@ -3013,22 +3027,22 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
          // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
          lscale_con = -lscale_con;
          offset = new SubLNode(zero, offset);
-          register_new_node(offset, pre_ctrl);
+          register_new_node(offset, new_limit_ctrl);
          limit  = new SubLNode(zero, limit);
-          register_new_node(limit, pre_ctrl);
+          register_new_node(limit, new_limit_ctrl);
          // Fall into LE case
        case BoolTest::le:
          if (b_test._test != BoolTest::gt) {
            // Convert X <= Y to X < Y+1
            limit = new AddLNode(limit, one);
-            register_new_node(limit, pre_ctrl);
+            register_new_node(limit, new_limit_ctrl);
          }
          // Fall into LT case
        case BoolTest::lt:
          // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
          // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
          // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
-          add_constraint(stride_con, lscale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
+          add_constraint(stride_con, lscale_con, offset, mini, limit, new_limit_ctrl, &pre_limit, &main_limit);
          break;
        default:
          if (PrintOpto) {
@ -3072,8 +3086,14 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
    // Computed pre-loop limit can be outside of loop iterations range.
    pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
                                 : (Node*)new MaxINode(pre_limit, orig_limit);
-    register_new_node(pre_limit, pre_ctrl);
+    register_new_node(pre_limit, new_limit_ctrl);
  }
+  // new pre_limit can push Bool/Cmp/Opaque nodes down (when one of the eliminated condition has parameters that are not
+  // loop invariant in the pre loop.
+  set_ctrl(pre_opaq, new_limit_ctrl);
+  set_ctrl(pre_end->cmp_node(), new_limit_ctrl);
+  set_ctrl(pre_end->in(1), new_limit_ctrl);
+
  _igvn.replace_input_of(pre_opaq, 1, pre_limit);

  // Note:: we are making the main loop limit no longer precise;
@ -3093,7 +3113,7 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
    register_new_node(main_cmp, main_cle->in(0));
    _igvn.replace_input_of(main_bol, 1, main_cmp);
  }
-  assert(main_limit == cl->limit() || get_ctrl(main_limit) == pre_ctrl, "wrong control for added limit");
+  assert(main_limit == cl->limit() || get_ctrl(main_limit) == new_limit_ctrl, "wrong control for added limit");
  const TypeInt* orig_limit_t = _igvn.type(orig_limit)->is_int();
  bool upward = cl->stride_con() > 0;
  // The new loop limit is <= (for an upward loop) >= (for a downward loop) than the orig limit.
@ -3101,7 +3121,7 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
  // may be too pessimistic. A CastII here guarantees it's not lost.
  main_limit = new CastIINode(pre_ctrl, main_limit, TypeInt::make(upward ? min_jint : orig_limit_t->_lo,
                                                        upward ? orig_limit_t->_hi : max_jint, Type::WidenMax));
-  register_new_node(main_limit, pre_ctrl);
+  register_new_node(main_limit, new_limit_ctrl);
  // Hack the now-private loop bounds
  _igvn.replace_input_of(main_cmp, 2, main_limit);
  if (abs_stride_is_one) {
@ -3112,6 +3132,37 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
  // The OpaqueNode is unshared by design
  assert(opqzm->outcnt() == 1, "cannot hack shared node");
  _igvn.replace_input_of(opqzm, 1, main_limit);
+  // new main_limit can push Bool/Cmp nodes down (when one of the eliminated condition has parameters that are not loop
+  // invariant in the pre loop.
+  set_ctrl(opqzm, new_limit_ctrl);
+  set_ctrl(iffm->in(1)->in(1), new_limit_ctrl);
+  set_ctrl(iffm->in(1), new_limit_ctrl);
+}
+
+// Adjust control for node and its inputs (and inputs of its inputs) to be above the pre end
+Node* PhaseIdealLoop::ensure_node_and_inputs_are_above_pre_end(CountedLoopEndNode* pre_end, Node* node) {
+  Node* control = get_ctrl(node);
+  assert(is_dominator(compute_early_ctrl(node, control), pre_end), "node pinned on loop exit test?");
+
+  if (is_dominator(control, pre_end)) {
+    return control;
+  }
+  control = pre_end->in(0);
+  ResourceMark rm;
+  Unique_Node_List wq;
+  wq.push(node);
+  for (uint i = 0; i < wq.size(); i++) {
+    Node* n = wq.at(i);
+    assert(is_dominator(compute_early_ctrl(n, get_ctrl(n)), pre_end), "node pinned on loop exit test?");
+    set_ctrl(n, control);
+    for (uint j = 0; j < n->req(); j++) {
+      Node* in = n->in(j);
+      if (in != nullptr && has_ctrl(in) && !is_dominator(get_ctrl(in), pre_end)) {
+        wq.push(in);
+      }
+    }
+  }
+  return control;
 }

 bool IdealLoopTree::compute_has_range_checks() const {
--- a/src/hotspot/share/opto/loopnode.hpp
+++ b/src/hotspot/share/opto/loopnode.hpp
@ -1180,6 +1180,16 @@ public:
  }
  Node *dom_lca_internal( Node *n1, Node *n2 ) const;

+  Node* dominated_node(Node* c1, Node* c2) {
+    assert(is_dominator(c1, c2) || is_dominator(c2, c1), "nodes must be related");
+    return is_dominator(c1, c2) ? c2 : c1;
+  }
+
+  // Return control node that's dominated by the 2 others
+  Node* dominated_node(Node* c1, Node* c2, Node* c3) {
+    return dominated_node(c1, dominated_node(c2, c3));
+  }
+
  // Build and verify the loop tree without modifying the graph.  This
  // is useful to verify that all inputs properly dominate their uses.
  static void verify(PhaseIterGVN& igvn) {
@ -1780,6 +1790,8 @@ public:
  bool can_move_to_inner_loop(Node* n, LoopNode* n_loop, Node* x);

  void pin_array_access_nodes_dependent_on(Node* ctrl);
+
+  Node* ensure_node_and_inputs_are_above_pre_end(CountedLoopEndNode* pre_end, Node* node);
 };