8303466: C2: failed: malformed control flow. Limit type made precise with MaxL/MinL

Reviewed-by: roland, kvn, chagedorn, thartmann
2023-04-26 05:42:26 +00:00 · 2023-04-26 05:42:26 +00:00 · cc894d849a
commit cc894d849a
parent ed1ebd242a
9 changed files with 386 additions and 91 deletions
--- a/src/hotspot/share/opto/addnode.cpp
+++ b/src/hotspot/share/opto/addnode.cpp
@ -1258,6 +1258,138 @@ const Type *MinINode::add_ring( const Type *t0, const Type *t1 ) const {
  return TypeInt::make( MIN2(r0->_lo,r1->_lo), MIN2(r0->_hi,r1->_hi), MAX2(r0->_widen,r1->_widen) );
 }

+// Collapse the "addition with overflow-protection" pattern, and the symmetrical
+// "subtraction with underflow-protection" pattern. These are created during the
+// unrolling, when we have to adjust the limit by subtracting the stride, but want
+// to protect against underflow: MaxL(SubL(limit, stride), min_jint).
+// If we have more than one of those in a sequence:
+//
+//   x  con2
+//   |  |
+//   AddL  clamp2
+//     |    |
+//    Max/MinL con1
+//          |  |
+//          AddL  clamp1
+//            |    |
+//           Max/MinL (n)
+//
+// We want to collapse it to:
+//
+//   x  con1  con2
+//   |    |    |
+//   |   AddLNode (new_con)
+//   |    |
+//  AddLNode  clamp1
+//        |    |
+//       Max/MinL (n)
+//
+// Note: we assume that SubL was already replaced by an AddL, and that the stride
+// has its sign flipped: SubL(limit, stride) -> AddL(limit, -stride).
+Node* fold_subI_no_underflow_pattern(Node* n, PhaseGVN* phase) {
+  assert(n->Opcode() == Op_MaxL || n->Opcode() == Op_MinL, "sanity");
+  // Check that the two clamps have the correct values.
+  jlong clamp = (n->Opcode() == Op_MaxL) ? min_jint : max_jint;
+  auto is_clamp = [&](Node* c) {
+    const TypeLong* t = phase->type(c)->isa_long();
+    return t != nullptr && t->is_con() &&
+           t->get_con() == clamp;
+  };
+  // Check that the constants are negative if MaxL, and positive if MinL.
+  auto is_sub_con = [&](Node* c) {
+    const TypeLong* t = phase->type(c)->isa_long();
+    return t != nullptr && t->is_con() &&
+           t->get_con() < max_jint && t->get_con() > min_jint &&
+           (t->get_con() < 0) == (n->Opcode() == Op_MaxL);
+  };
+  // Verify the graph level by level:
+  Node* add1   = n->in(1);
+  Node* clamp1 = n->in(2);
+  if (add1->Opcode() == Op_AddL && is_clamp(clamp1)) {
+    Node* max2 = add1->in(1);
+    Node* con1 = add1->in(2);
+    if (max2->Opcode() == n->Opcode() && is_sub_con(con1)) {
+      Node* add2   = max2->in(1);
+      Node* clamp2 = max2->in(2);
+      if (add2->Opcode() == Op_AddL && is_clamp(clamp2)) {
+        Node* x    = add2->in(1);
+        Node* con2 = add2->in(2);
+        if (is_sub_con(con2)) {
+          Node* new_con = phase->transform(new AddLNode(con1, con2));
+          Node* new_sub = phase->transform(new AddLNode(x, new_con));
+          n->set_req_X(1, new_sub, phase);
+          return n;
+        }
+      }
+    }
+  }
+  return nullptr;
+}
+
+const Type* MaxLNode::add_ring(const Type* t0, const Type* t1) const {
+  const TypeLong* r0 = t0->is_long();
+  const TypeLong* r1 = t1->is_long();
+
+  return TypeLong::make(MAX2(r0->_lo, r1->_lo), MAX2(r0->_hi, r1->_hi), MAX2(r0->_widen, r1->_widen));
+}
+
+Node* MaxLNode::Identity(PhaseGVN* phase) {
+  const TypeLong* t1 = phase->type(in(1))->is_long();
+  const TypeLong* t2 = phase->type(in(2))->is_long();
+
+  // Can we determine maximum statically?
+  if (t1->_lo >= t2->_hi) {
+    return in(1);
+  } else if (t2->_lo >= t1->_hi) {
+    return in(2);
+  }
+
+  return MaxNode::Identity(phase);
+}
+
+Node* MaxLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
+  Node* n = AddNode::Ideal(phase, can_reshape);
+  if (n != nullptr) {
+    return n;
+  }
+  if (can_reshape) {
+    return fold_subI_no_underflow_pattern(this, phase);
+  }
+  return nullptr;
+}
+
+const Type* MinLNode::add_ring(const Type* t0, const Type* t1) const {
+  const TypeLong* r0 = t0->is_long();
+  const TypeLong* r1 = t1->is_long();
+
+  return TypeLong::make(MIN2(r0->_lo, r1->_lo), MIN2(r0->_hi, r1->_hi), MIN2(r0->_widen, r1->_widen));
+}
+
+Node* MinLNode::Identity(PhaseGVN* phase) {
+  const TypeLong* t1 = phase->type(in(1))->is_long();
+  const TypeLong* t2 = phase->type(in(2))->is_long();
+
+  // Can we determine minimum statically?
+  if (t1->_lo >= t2->_hi) {
+    return in(2);
+  } else if (t2->_lo >= t1->_hi) {
+    return in(1);
+  }
+
+  return MaxNode::Identity(phase);
+}
+
+Node* MinLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
+  Node* n = AddNode::Ideal(phase, can_reshape);
+  if (n != nullptr) {
+    return n;
+  }
+  if (can_reshape) {
+    return fold_subI_no_underflow_pattern(this, phase);
+  }
+  return nullptr;
+}
+
 //------------------------------add_ring---------------------------------------
 const Type *MinFNode::add_ring( const Type *t0, const Type *t1 ) const {
  const TypeF *r0 = t0->is_float_constant();
--- a/src/hotspot/share/opto/addnode.hpp
+++ b/src/hotspot/share/opto/addnode.hpp
@ -322,28 +322,38 @@ public:
 // MAXimum of 2 longs.
 class MaxLNode : public MaxNode {
 public:
-  MaxLNode(Node *in1, Node *in2) : MaxNode(in1, in2) {}
+  MaxLNode(Compile* C, Node* in1, Node* in2) : MaxNode(in1, in2) {
+    init_flags(Flag_is_macro);
+    C->add_macro_node(this);
+  }
  virtual int Opcode() const;
-  virtual const Type *add_ring(const Type*, const Type*) const { return TypeLong::LONG; }
-  virtual const Type *add_id() const { return TypeLong::make(min_jlong); }
-  virtual const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual const Type* add_ring(const Type* t0, const Type* t1) const;
+  virtual const Type* add_id() const { return TypeLong::make(min_jlong); }
+  virtual const Type* bottom_type() const { return TypeLong::LONG; }
  virtual uint ideal_reg() const { return Op_RegL; }
  int max_opcode() const { return Op_MaxL; }
  int min_opcode() const { return Op_MinL; }
+  virtual Node* Identity(PhaseGVN* phase);
+  virtual Node* Ideal(PhaseGVN *phase, bool can_reshape);
 };

 //------------------------------MinLNode---------------------------------------
 // MINimum of 2 longs.
 class MinLNode : public MaxNode {
 public:
-  MinLNode(Node *in1, Node *in2) : MaxNode(in1, in2) {}
+  MinLNode(Compile* C, Node* in1, Node* in2) : MaxNode(in1, in2) {
+    init_flags(Flag_is_macro);
+    C->add_macro_node(this);
+  }
  virtual int Opcode() const;
-  virtual const Type *add_ring(const Type*, const Type*) const { return TypeLong::LONG; }
-  virtual const Type *add_id() const { return TypeLong::make(max_jlong); }
-  virtual const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual const Type* add_ring(const Type* t0, const Type* t1) const;
+  virtual const Type* add_id() const { return TypeLong::make(max_jlong); }
+  virtual const Type* bottom_type() const { return TypeLong::LONG; }
  virtual uint ideal_reg() const { return Op_RegL; }
  int max_opcode() const { return Op_MaxL; }
  int min_opcode() const { return Op_MinL; }
+  virtual Node* Identity(PhaseGVN* phase);
+  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
 };

 //------------------------------MaxFNode---------------------------------------
--- a/src/hotspot/share/opto/convertnode.cpp
+++ b/src/hotspot/share/opto/convertnode.cpp
@ -322,6 +322,20 @@ const Type* ConvI2LNode::Value(PhaseGVN* phase) const {
  return this_type;
 }

+Node* ConvI2LNode::Identity(PhaseGVN* phase) {
+  // If type is in "int" sub-range, we can
+  // convert I2L(L2I(x)) => x
+  // since the conversions have no effect.
+  if (in(1)->Opcode() == Op_ConvL2I) {
+    Node* x = in(1)->in(1);
+    const TypeLong* t = phase->type(x)->isa_long();
+    if (t != nullptr && t->_lo >= min_jint && t->_hi <= max_jint) {
+      return x;
+    }
+  }
+  return this;
+}
+
 #ifdef ASSERT
 static inline bool long_ranges_overlap(jlong lo1, jlong hi1,
                                       jlong lo2, jlong hi2) {
--- a/src/hotspot/share/opto/convertnode.hpp
+++ b/src/hotspot/share/opto/convertnode.hpp
@ -190,6 +190,7 @@ class ConvI2LNode : public TypeNode {
  virtual int Opcode() const;
  virtual const Type* Value(PhaseGVN* phase) const;
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual Node* Identity(PhaseGVN* phase);
  virtual uint  ideal_reg() const { return Op_RegL; }
 };

--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@ -2288,74 +2288,32 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj
      new_limit = _igvn.intcon(limit->get_int() - stride_con);
      set_ctrl(new_limit, C->root());
    } else {
-      // Limit is not constant.
-      assert(loop_head->unrolled_count() != 1 || has_ctrl(opaq), "should have opaque for first unroll");
-      if ((stride_con > 0 && (java_subtract(limit_type->_lo, stride_con) < limit_type->_lo)) ||
-          (stride_con < 0 && (java_subtract(limit_type->_hi, stride_con) > limit_type->_hi))) {
-        // No underflow.
-        new_limit = new SubINode(limit, stride);
+      // Limit is not constant. Int subtraction could lead to underflow.
+      // (1) Convert to long.
+      Node* limit_l = new ConvI2LNode(limit);
+      register_new_node(limit_l, get_ctrl(limit));
+      Node* stride_l = _igvn.longcon(stride_con);
+      set_ctrl(stride_l, C->root());
+
+      // (2) Subtract: compute in long, to prevent underflow.
+      Node* new_limit_l = new SubLNode(limit_l, stride_l);
+      register_new_node(new_limit_l, ctrl);
+
+      // (3) Clamp to int range, in case we had subtraction underflow.
+      Node* underflow_clamp_l = _igvn.longcon((stride_con > 0) ? min_jint : max_jint);
+      set_ctrl(underflow_clamp_l, C->root());
+      Node* new_limit_no_underflow_l = nullptr;
+      if (stride_con > 0) {
+        // limit = MaxL(limit - stride, min_jint)
+        new_limit_no_underflow_l = new MaxLNode(C, new_limit_l, underflow_clamp_l);
      } else {
-        // (limit - stride) may underflow.
-        // Clamp the adjustment value with MININT or MAXINT:
-        //
-        //   new_limit = limit-stride
-        //   if (stride > 0)
-        //     new_limit = (limit < new_limit) ? MININT : new_limit;
-        //   else
-        //     new_limit = (limit > new_limit) ? MAXINT : new_limit;
-        //
-        BoolTest::mask bt = loop_end->test_trip();
-        assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
-        Node* underflow_clamp = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
-        set_ctrl(underflow_clamp, C->root());
-        Node* limit_before_underflow = nullptr;
-        Node* prev_limit = nullptr;
-        Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : nullptr;
-        if (loop_head->unrolled_count() > 1 &&
-            limit->is_CMove() && limit->Opcode() == Op_CMoveI &&
-            limit->in(CMoveNode::IfTrue) == underflow_clamp &&
-            bol->as_Bool()->_test._test == bt &&
-            bol->in(1)->Opcode() == Op_CmpI &&
-            bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
-          // Loop was unrolled before, and had an unrolling protection CMoveI.
-          // Use inputs to previous CMoveI for the new one:
-          prev_limit = limit->in(CMoveNode::IfFalse); // unpack previous limit with underflow
-          limit_before_underflow = bol->in(1)->in(1); // CMoveI -> Bool -> CmpI -> limit_before_underflow
-        } else {
-          // Loop was not unrolled before, or the limit did not underflow in a previous unrolling.
-          prev_limit = limit;
-          limit_before_underflow = limit;
-        }
-        //                           prev_limit   stride
-        //                                |         |
-        // limit_before_underflow  new_limit_with_underflow (SubI)
-        //                   |        |           |
-        //                 underflow_cmp          |
-        //                   |                    |
-        //                 underflow_bool [lt/gt] |
-        //                   |                    |
-        //                   +----+  +------------+
-        //                        |  |
-        //                        |  | underflow_clamp (min_jint/max_jint)
-        //                        |  |  |
-        //                       CMoveINode ([min_jint..hi] / [lo..max_jing])
-        //
-        assert(limit_before_underflow != nullptr && prev_limit != nullptr, "must find them");
-        Node* new_limit_with_underflow = new SubINode(prev_limit, stride);
-        register_new_node(new_limit_with_underflow, ctrl);
-        // We must compare with limit_before_underflow, prev_limit may already have underflowed.
-        Node* underflow_cmp = new CmpINode(limit_before_underflow, new_limit_with_underflow);
-        register_new_node(underflow_cmp, ctrl);
-        Node* underflow_bool = new BoolNode(underflow_cmp, bt);
-        register_new_node(underflow_bool, ctrl);
-        // Prevent type from becoming too pessimistic due to type underflow. The new limit
-        // may be arbitrarily decreased by unrolling, but still in [min_jint..hi] / [lo..max_jint]
-        const TypeInt* limit_before_underflow_t = _igvn.type(limit_before_underflow)->is_int();
-        const TypeInt* no_underflow_t = TypeInt::make(stride_con > 0 ? min_jint : limit_before_underflow_t->_lo,
-                                                      stride_con > 0 ? limit_before_underflow_t->_hi : max_jint,
-                                                      Type::WidenMax);
-        new_limit = new CMoveINode(underflow_bool, new_limit_with_underflow, underflow_clamp, no_underflow_t);
+        // limit = MinL(limit - stride, max_jint)
+        new_limit_no_underflow_l = new MinLNode(C, new_limit_l, underflow_clamp_l);
      }
+      register_new_node(new_limit_no_underflow_l, ctrl);
+
+      // (4) Convert back to int.
+      new_limit = new ConvL2INode(new_limit_no_underflow_l);
      register_new_node(new_limit, ctrl);
    }

@ -2564,6 +2522,9 @@ void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
 //------------------------------adjust_limit-----------------------------------
 // Helper function that computes new loop limit as (rc_limit-offset)/scale
 Node* PhaseIdealLoop::adjust_limit(bool is_positive_stride, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round) {
+  Node* old_limit_long = new ConvI2LNode(old_limit);
+  register_new_node(old_limit_long, pre_ctrl);
+
  Node* sub = new SubLNode(rc_limit, offset);
  register_new_node(sub, pre_ctrl);
  Node* limit = new DivLNode(nullptr, sub, scale);
@ -2589,27 +2550,19 @@ Node* PhaseIdealLoop::adjust_limit(bool is_positive_stride, Node* scale, Node* o
  //   - integer underflow of limit: MAXL chooses old_limit (>= MIN_INT > limit)
  // INT() is finally converting the limit back to an integer value.

-  // We use CMove nodes to implement long versions of min/max (MINL/MAXL).
-  // We use helper methods for inner MINL/MAXL which return CMoveL nodes to keep a long value for the outer MINL/MAXL comparison:
-  Node* inner_result_long;
+  Node* inner_result_long = nullptr;
+  Node* outer_result_long = nullptr;
  if (is_positive_stride) {
-    inner_result_long = MaxNode::signed_max(limit, _igvn.longcon(min_jint), TypeLong::LONG, _igvn);
+    inner_result_long = new MaxLNode(C, limit, _igvn.longcon(min_jint));
+    outer_result_long = new MinLNode(C, inner_result_long, old_limit_long);
  } else {
-    inner_result_long = MaxNode::signed_min(limit, _igvn.longcon(max_jint), TypeLong::LONG, _igvn);
+    inner_result_long = new MinLNode(C, limit, _igvn.longcon(max_jint));
+    outer_result_long = new MaxLNode(C, inner_result_long, old_limit_long);
  }
-  set_subtree_ctrl(inner_result_long, false);
+  register_new_node(inner_result_long, pre_ctrl);
+  register_new_node(outer_result_long, pre_ctrl);

-  // Outer MINL/MAXL:
-  // The comparison is done with long values but the result is the converted back to int by using CmovI.
-  Node* old_limit_long = new ConvI2LNode(old_limit);
-  register_new_node(old_limit_long, pre_ctrl);
-  Node* cmp = new CmpLNode(old_limit_long, limit);
-  register_new_node(cmp, pre_ctrl);
-  Node* bol = new BoolNode(cmp, is_positive_stride ? BoolTest::gt : BoolTest::lt);
-  register_new_node(bol, pre_ctrl);
-  Node* inner_result_int = new ConvL2INode(inner_result_long); // Could under-/overflow but that's fine as comparison was done with CmpL
-  register_new_node(inner_result_int, pre_ctrl);
-  limit = new CMoveINode(bol, old_limit, inner_result_int, TypeInt::INT);
+  limit = new ConvL2INode(outer_result_long);
  register_new_node(limit, pre_ctrl);
  return limit;
 }
--- a/src/hotspot/share/opto/macro.cpp
+++ b/src/hotspot/share/opto/macro.cpp
@ -2373,6 +2373,8 @@ void PhaseMacroExpand::eliminate_macro_nodes() {
        assert(n->Opcode() == Op_LoopLimit ||
               n->Opcode() == Op_Opaque3   ||
               n->Opcode() == Op_Opaque4   ||
+               n->Opcode() == Op_MaxL      ||
+               n->Opcode() == Op_MinL      ||
               BarrierSet::barrier_set()->barrier_set_c2()->is_gc_barrier_node(n),
               "unknown node type in macro list");
      }
@ -2457,6 +2459,18 @@ bool PhaseMacroExpand::expand_macro_nodes() {
        n->as_OuterStripMinedLoop()->adjust_strip_mined_loop(&_igvn);
        C->remove_macro_node(n);
        success = true;
+      } else if (n->Opcode() == Op_MaxL) {
+        // Since MaxL and MinL are not implemented in the backend, we expand them to
+        // a CMoveL construct now. At least until here, the type could be computed
+        // precisely. CMoveL is not so smart, but we can give it at least the best
+        // type we know abouot n now.
+        Node* repl = MaxNode::signed_max(n->in(1), n->in(2), _igvn.type(n), _igvn);
+        _igvn.replace_node(n, repl);
+        success = true;
+      } else if (n->Opcode() == Op_MinL) {
+        Node* repl = MaxNode::signed_min(n->in(1), n->in(2), _igvn.type(n), _igvn);
+        _igvn.replace_node(n, repl);
+        success = true;
      }
      assert(!success || (C->macro_count() == (old_macro_count - 1)), "elimination must have deleted one node from macro list");
      progress = progress || success;
--- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
+++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
@ -654,6 +654,11 @@ public class IRNode {
        beforeMatchingNameRegex(MAX_I, "MaxI");
    }

+    public static final String MAX_L = PREFIX + "MAX_L" + POSTFIX;
+    static {
+        beforeMatchingNameRegex(MAX_L, "MaxL");
+    }
+
    public static final String MAX_V = PREFIX + "MAX_V" + POSTFIX;
    static {
        beforeMatchingNameRegex(MAX_V, "MaxV");
@ -679,6 +684,11 @@ public class IRNode {
        beforeMatchingNameRegex(MIN_I, "MinI");
    }

+    public static final String MIN_L = PREFIX + "MIN_L" + POSTFIX;
+    static {
+        beforeMatchingNameRegex(MIN_L, "MinL");
+    }
+
    public static final String MIN_V = PREFIX + "MIN_V" + POSTFIX;
    static {
        beforeMatchingNameRegex(MIN_V, "MinV");
--- a/test/hotspot/jtreg/compiler/loopopts/TestLoopLimitSubtractionsCollapse.java
+++ b/test/hotspot/jtreg/compiler/loopopts/TestLoopLimitSubtractionsCollapse.java
@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8303466
+ * @summary Verify that AddL->MaxL->AddL->MaxL chains of unroll limit adjustments collapse.
+ *          If it did not collapse, we would have about 10 MaxL/MinL. With the collapse, it
+ *          is now one or two.
+ * @library /test/lib /
+ * @requires vm.compiler2.enabled
+ * @run driver compiler.loopopts.TestLoopLimitSubtractionsCollapse
+ */
+
+package compiler.loopopts;
+import compiler.lib.ir_framework.*;
+
+public class TestLoopLimitSubtractionsCollapse {
+    static int START = 0;
+    static int FINISH = 512;
+    static int RANGE = 512;
+
+    static byte[] data1 = new byte[RANGE];
+    static byte[] data2 = new byte[RANGE];
+
+    public static void main(String[] args) {
+        TestFramework.run();
+    }
+
+    @Test
+    @Warmup(0)
+    @IR(counts = {IRNode.MAX_L, "> 0", IRNode.MAX_L, "<= 2"},
+        phase = CompilePhase.PHASEIDEALLOOP_ITERATIONS)
+    public static void test1() {
+        for (int j = START; j < FINISH; j++) {
+            data1[j] = (byte)(data1[j] * 11);
+        }
+    }
+
+    @Test
+    @Warmup(0)
+    @IR(counts = {IRNode.MIN_L, "> 0", IRNode.MIN_L, "<= 2"},
+        phase = CompilePhase.PHASEIDEALLOOP_ITERATIONS)
+    public static void test2() {
+        for (int j = FINISH-1; j >= START; j--) {
+            data2[j] = (byte)(data2[j] * 11);
+        }
+    }
+}
--- a/test/hotspot/jtreg/compiler/loopopts/TestUnrollLimitPreciseType.java
+++ b/test/hotspot/jtreg/compiler/loopopts/TestUnrollLimitPreciseType.java
@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test id=test1
+ * @bug 8298935
+ * @summary CMoveI for underflow protection of the limit did not compute a type that was precise enough.
+ *          This lead to dead data but zero-trip-guard control did not die -> "malformed control flow".
+ * @requires vm.compiler2.enabled
+ * @run main/othervm
+ *      -XX:CompileCommand=compileonly,compiler.loopopts.TestUnrollLimitPreciseType::test1
+ *      -XX:CompileCommand=dontinline,compiler.loopopts.TestUnrollLimitPreciseType::*
+ *      -XX:MaxVectorSize=64
+ *      -Xcomp
+ *      -XX:+UnlockExperimentalVMOptions -XX:PerMethodSpecTrapLimit=0 -XX:PerMethodTrapLimit=0
+ *      compiler.loopopts.TestUnrollLimitPreciseType test1
+ */
+
+/*
+ * @test id=test2
+ * @bug 8298935
+ * @summary CMoveI for underflow protection of the limit did not compute a type that was precise enough.
+ *          This lead to dead data but zero-trip-guard control did not die -> "malformed control flow".
+ * @requires vm.compiler2.enabled
+ * @run main/othervm
+ *      -XX:CompileCommand=compileonly,compiler.loopopts.TestUnrollLimitPreciseType::*
+ *      -Xcomp
+ *      compiler.loopopts.TestUnrollLimitPreciseType test2
+ */
+
+
+package compiler.loopopts;
+
+public class TestUnrollLimitPreciseType {
+    static final int RANGE = 512;
+
+    public static void main(String args[]) {
+        if (args.length != 1) {
+            throw new RuntimeException("Need exactly one argument.");
+        }
+        if (args[0].equals("test1")) {
+            byte[] data = new byte[RANGE];
+            test1(data);
+        } else if (args[0].equals("test2")) {
+            test2();
+        } else {
+            throw new RuntimeException("Do not have: " + args[0]);
+        }
+    }
+
+    public static void test1(byte[] data) {
+        // Did not fully analyze this. But it is also unrolled, SuperWorded,
+        // and further unrolled with vectorlized post loop.
+        // Only seems to reproduce with avx512, and not with avx2.
+        for (int j = 192; j < RANGE; j++) {
+            data[j - 192] = (byte)(data[j] * 11);
+        }
+    }
+
+    static void test2() {
+        // Loop is SuperWord'ed.
+        // We unroll more afterwards, and so add vectorized post loop.
+        // But it turns out that the vectorized post loop is never entered.
+        // This lead to assert, because the zero-trip-guard did not collaspse,
+        // but the CastII with the trip count did die.
+        // Only seems to reproduce with avx512, and not with avx2.
+        double dArr[][] = new double[100][100];
+        for (int i = 2, j = 2; j < 68; j++) {
+            dArr[i][j] = 8;
+        }
+    }
+}