From cc894d849aa5f730d5a806acfc7a237cf5170af1 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 26 Apr 2023 05:42:26 +0000 Subject: [PATCH] 8303466: C2: failed: malformed control flow. Limit type made precise with MaxL/MinL Reviewed-by: roland, kvn, chagedorn, thartmann --- src/hotspot/share/opto/addnode.cpp | 132 ++++++++++++++++++ src/hotspot/share/opto/addnode.hpp | 26 ++-- src/hotspot/share/opto/convertnode.cpp | 14 ++ src/hotspot/share/opto/convertnode.hpp | 1 + src/hotspot/share/opto/loopTransform.cpp | 119 +++++----------- src/hotspot/share/opto/macro.cpp | 14 ++ .../compiler/lib/ir_framework/IRNode.java | 10 ++ .../TestLoopLimitSubtractionsCollapse.java | 69 +++++++++ .../loopopts/TestUnrollLimitPreciseType.java | 92 ++++++++++++ 9 files changed, 386 insertions(+), 91 deletions(-) create mode 100644 test/hotspot/jtreg/compiler/loopopts/TestLoopLimitSubtractionsCollapse.java create mode 100644 test/hotspot/jtreg/compiler/loopopts/TestUnrollLimitPreciseType.java diff --git a/src/hotspot/share/opto/addnode.cpp b/src/hotspot/share/opto/addnode.cpp index f4931085fa4..3c56f5bd770 100644 --- a/src/hotspot/share/opto/addnode.cpp +++ b/src/hotspot/share/opto/addnode.cpp @@ -1258,6 +1258,138 @@ const Type *MinINode::add_ring( const Type *t0, const Type *t1 ) const { return TypeInt::make( MIN2(r0->_lo,r1->_lo), MIN2(r0->_hi,r1->_hi), MAX2(r0->_widen,r1->_widen) ); } +// Collapse the "addition with overflow-protection" pattern, and the symmetrical +// "subtraction with underflow-protection" pattern. These are created during the +// unrolling, when we have to adjust the limit by subtracting the stride, but want +// to protect against underflow: MaxL(SubL(limit, stride), min_jint). +// If we have more than one of those in a sequence: +// +// x con2 +// | | +// AddL clamp2 +// | | +// Max/MinL con1 +// | | +// AddL clamp1 +// | | +// Max/MinL (n) +// +// We want to collapse it to: +// +// x con1 con2 +// | | | +// | AddLNode (new_con) +// | | +// AddLNode clamp1 +// | | +// Max/MinL (n) +// +// Note: we assume that SubL was already replaced by an AddL, and that the stride +// has its sign flipped: SubL(limit, stride) -> AddL(limit, -stride). +Node* fold_subI_no_underflow_pattern(Node* n, PhaseGVN* phase) { + assert(n->Opcode() == Op_MaxL || n->Opcode() == Op_MinL, "sanity"); + // Check that the two clamps have the correct values. + jlong clamp = (n->Opcode() == Op_MaxL) ? min_jint : max_jint; + auto is_clamp = [&](Node* c) { + const TypeLong* t = phase->type(c)->isa_long(); + return t != nullptr && t->is_con() && + t->get_con() == clamp; + }; + // Check that the constants are negative if MaxL, and positive if MinL. + auto is_sub_con = [&](Node* c) { + const TypeLong* t = phase->type(c)->isa_long(); + return t != nullptr && t->is_con() && + t->get_con() < max_jint && t->get_con() > min_jint && + (t->get_con() < 0) == (n->Opcode() == Op_MaxL); + }; + // Verify the graph level by level: + Node* add1 = n->in(1); + Node* clamp1 = n->in(2); + if (add1->Opcode() == Op_AddL && is_clamp(clamp1)) { + Node* max2 = add1->in(1); + Node* con1 = add1->in(2); + if (max2->Opcode() == n->Opcode() && is_sub_con(con1)) { + Node* add2 = max2->in(1); + Node* clamp2 = max2->in(2); + if (add2->Opcode() == Op_AddL && is_clamp(clamp2)) { + Node* x = add2->in(1); + Node* con2 = add2->in(2); + if (is_sub_con(con2)) { + Node* new_con = phase->transform(new AddLNode(con1, con2)); + Node* new_sub = phase->transform(new AddLNode(x, new_con)); + n->set_req_X(1, new_sub, phase); + return n; + } + } + } + } + return nullptr; +} + +const Type* MaxLNode::add_ring(const Type* t0, const Type* t1) const { + const TypeLong* r0 = t0->is_long(); + const TypeLong* r1 = t1->is_long(); + + return TypeLong::make(MAX2(r0->_lo, r1->_lo), MAX2(r0->_hi, r1->_hi), MAX2(r0->_widen, r1->_widen)); +} + +Node* MaxLNode::Identity(PhaseGVN* phase) { + const TypeLong* t1 = phase->type(in(1))->is_long(); + const TypeLong* t2 = phase->type(in(2))->is_long(); + + // Can we determine maximum statically? + if (t1->_lo >= t2->_hi) { + return in(1); + } else if (t2->_lo >= t1->_hi) { + return in(2); + } + + return MaxNode::Identity(phase); +} + +Node* MaxLNode::Ideal(PhaseGVN* phase, bool can_reshape) { + Node* n = AddNode::Ideal(phase, can_reshape); + if (n != nullptr) { + return n; + } + if (can_reshape) { + return fold_subI_no_underflow_pattern(this, phase); + } + return nullptr; +} + +const Type* MinLNode::add_ring(const Type* t0, const Type* t1) const { + const TypeLong* r0 = t0->is_long(); + const TypeLong* r1 = t1->is_long(); + + return TypeLong::make(MIN2(r0->_lo, r1->_lo), MIN2(r0->_hi, r1->_hi), MIN2(r0->_widen, r1->_widen)); +} + +Node* MinLNode::Identity(PhaseGVN* phase) { + const TypeLong* t1 = phase->type(in(1))->is_long(); + const TypeLong* t2 = phase->type(in(2))->is_long(); + + // Can we determine minimum statically? + if (t1->_lo >= t2->_hi) { + return in(2); + } else if (t2->_lo >= t1->_hi) { + return in(1); + } + + return MaxNode::Identity(phase); +} + +Node* MinLNode::Ideal(PhaseGVN* phase, bool can_reshape) { + Node* n = AddNode::Ideal(phase, can_reshape); + if (n != nullptr) { + return n; + } + if (can_reshape) { + return fold_subI_no_underflow_pattern(this, phase); + } + return nullptr; +} + //------------------------------add_ring--------------------------------------- const Type *MinFNode::add_ring( const Type *t0, const Type *t1 ) const { const TypeF *r0 = t0->is_float_constant(); diff --git a/src/hotspot/share/opto/addnode.hpp b/src/hotspot/share/opto/addnode.hpp index c6a6138adb7..fb47972e8b3 100644 --- a/src/hotspot/share/opto/addnode.hpp +++ b/src/hotspot/share/opto/addnode.hpp @@ -322,28 +322,38 @@ public: // MAXimum of 2 longs. class MaxLNode : public MaxNode { public: - MaxLNode(Node *in1, Node *in2) : MaxNode(in1, in2) {} + MaxLNode(Compile* C, Node* in1, Node* in2) : MaxNode(in1, in2) { + init_flags(Flag_is_macro); + C->add_macro_node(this); + } virtual int Opcode() const; - virtual const Type *add_ring(const Type*, const Type*) const { return TypeLong::LONG; } - virtual const Type *add_id() const { return TypeLong::make(min_jlong); } - virtual const Type *bottom_type() const { return TypeLong::LONG; } + virtual const Type* add_ring(const Type* t0, const Type* t1) const; + virtual const Type* add_id() const { return TypeLong::make(min_jlong); } + virtual const Type* bottom_type() const { return TypeLong::LONG; } virtual uint ideal_reg() const { return Op_RegL; } int max_opcode() const { return Op_MaxL; } int min_opcode() const { return Op_MinL; } + virtual Node* Identity(PhaseGVN* phase); + virtual Node* Ideal(PhaseGVN *phase, bool can_reshape); }; //------------------------------MinLNode--------------------------------------- // MINimum of 2 longs. class MinLNode : public MaxNode { public: - MinLNode(Node *in1, Node *in2) : MaxNode(in1, in2) {} + MinLNode(Compile* C, Node* in1, Node* in2) : MaxNode(in1, in2) { + init_flags(Flag_is_macro); + C->add_macro_node(this); + } virtual int Opcode() const; - virtual const Type *add_ring(const Type*, const Type*) const { return TypeLong::LONG; } - virtual const Type *add_id() const { return TypeLong::make(max_jlong); } - virtual const Type *bottom_type() const { return TypeLong::LONG; } + virtual const Type* add_ring(const Type* t0, const Type* t1) const; + virtual const Type* add_id() const { return TypeLong::make(max_jlong); } + virtual const Type* bottom_type() const { return TypeLong::LONG; } virtual uint ideal_reg() const { return Op_RegL; } int max_opcode() const { return Op_MaxL; } int min_opcode() const { return Op_MinL; } + virtual Node* Identity(PhaseGVN* phase); + virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); }; //------------------------------MaxFNode--------------------------------------- diff --git a/src/hotspot/share/opto/convertnode.cpp b/src/hotspot/share/opto/convertnode.cpp index b276a4d1611..ab2e839424a 100644 --- a/src/hotspot/share/opto/convertnode.cpp +++ b/src/hotspot/share/opto/convertnode.cpp @@ -322,6 +322,20 @@ const Type* ConvI2LNode::Value(PhaseGVN* phase) const { return this_type; } +Node* ConvI2LNode::Identity(PhaseGVN* phase) { + // If type is in "int" sub-range, we can + // convert I2L(L2I(x)) => x + // since the conversions have no effect. + if (in(1)->Opcode() == Op_ConvL2I) { + Node* x = in(1)->in(1); + const TypeLong* t = phase->type(x)->isa_long(); + if (t != nullptr && t->_lo >= min_jint && t->_hi <= max_jint) { + return x; + } + } + return this; +} + #ifdef ASSERT static inline bool long_ranges_overlap(jlong lo1, jlong hi1, jlong lo2, jlong hi2) { diff --git a/src/hotspot/share/opto/convertnode.hpp b/src/hotspot/share/opto/convertnode.hpp index e58213fbc09..dbebf337db2 100644 --- a/src/hotspot/share/opto/convertnode.hpp +++ b/src/hotspot/share/opto/convertnode.hpp @@ -190,6 +190,7 @@ class ConvI2LNode : public TypeNode { virtual int Opcode() const; virtual const Type* Value(PhaseGVN* phase) const; virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); + virtual Node* Identity(PhaseGVN* phase); virtual uint ideal_reg() const { return Op_RegL; } }; diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp index cb704b076a4..9070a1080f4 100644 --- a/src/hotspot/share/opto/loopTransform.cpp +++ b/src/hotspot/share/opto/loopTransform.cpp @@ -2288,74 +2288,32 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj new_limit = _igvn.intcon(limit->get_int() - stride_con); set_ctrl(new_limit, C->root()); } else { - // Limit is not constant. - assert(loop_head->unrolled_count() != 1 || has_ctrl(opaq), "should have opaque for first unroll"); - if ((stride_con > 0 && (java_subtract(limit_type->_lo, stride_con) < limit_type->_lo)) || - (stride_con < 0 && (java_subtract(limit_type->_hi, stride_con) > limit_type->_hi))) { - // No underflow. - new_limit = new SubINode(limit, stride); + // Limit is not constant. Int subtraction could lead to underflow. + // (1) Convert to long. + Node* limit_l = new ConvI2LNode(limit); + register_new_node(limit_l, get_ctrl(limit)); + Node* stride_l = _igvn.longcon(stride_con); + set_ctrl(stride_l, C->root()); + + // (2) Subtract: compute in long, to prevent underflow. + Node* new_limit_l = new SubLNode(limit_l, stride_l); + register_new_node(new_limit_l, ctrl); + + // (3) Clamp to int range, in case we had subtraction underflow. + Node* underflow_clamp_l = _igvn.longcon((stride_con > 0) ? min_jint : max_jint); + set_ctrl(underflow_clamp_l, C->root()); + Node* new_limit_no_underflow_l = nullptr; + if (stride_con > 0) { + // limit = MaxL(limit - stride, min_jint) + new_limit_no_underflow_l = new MaxLNode(C, new_limit_l, underflow_clamp_l); } else { - // (limit - stride) may underflow. - // Clamp the adjustment value with MININT or MAXINT: - // - // new_limit = limit-stride - // if (stride > 0) - // new_limit = (limit < new_limit) ? MININT : new_limit; - // else - // new_limit = (limit > new_limit) ? MAXINT : new_limit; - // - BoolTest::mask bt = loop_end->test_trip(); - assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected"); - Node* underflow_clamp = _igvn.intcon((stride_con > 0) ? min_jint : max_jint); - set_ctrl(underflow_clamp, C->root()); - Node* limit_before_underflow = nullptr; - Node* prev_limit = nullptr; - Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : nullptr; - if (loop_head->unrolled_count() > 1 && - limit->is_CMove() && limit->Opcode() == Op_CMoveI && - limit->in(CMoveNode::IfTrue) == underflow_clamp && - bol->as_Bool()->_test._test == bt && - bol->in(1)->Opcode() == Op_CmpI && - bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) { - // Loop was unrolled before, and had an unrolling protection CMoveI. - // Use inputs to previous CMoveI for the new one: - prev_limit = limit->in(CMoveNode::IfFalse); // unpack previous limit with underflow - limit_before_underflow = bol->in(1)->in(1); // CMoveI -> Bool -> CmpI -> limit_before_underflow - } else { - // Loop was not unrolled before, or the limit did not underflow in a previous unrolling. - prev_limit = limit; - limit_before_underflow = limit; - } - // prev_limit stride - // | | - // limit_before_underflow new_limit_with_underflow (SubI) - // | | | - // underflow_cmp | - // | | - // underflow_bool [lt/gt] | - // | | - // +----+ +------------+ - // | | - // | | underflow_clamp (min_jint/max_jint) - // | | | - // CMoveINode ([min_jint..hi] / [lo..max_jing]) - // - assert(limit_before_underflow != nullptr && prev_limit != nullptr, "must find them"); - Node* new_limit_with_underflow = new SubINode(prev_limit, stride); - register_new_node(new_limit_with_underflow, ctrl); - // We must compare with limit_before_underflow, prev_limit may already have underflowed. - Node* underflow_cmp = new CmpINode(limit_before_underflow, new_limit_with_underflow); - register_new_node(underflow_cmp, ctrl); - Node* underflow_bool = new BoolNode(underflow_cmp, bt); - register_new_node(underflow_bool, ctrl); - // Prevent type from becoming too pessimistic due to type underflow. The new limit - // may be arbitrarily decreased by unrolling, but still in [min_jint..hi] / [lo..max_jint] - const TypeInt* limit_before_underflow_t = _igvn.type(limit_before_underflow)->is_int(); - const TypeInt* no_underflow_t = TypeInt::make(stride_con > 0 ? min_jint : limit_before_underflow_t->_lo, - stride_con > 0 ? limit_before_underflow_t->_hi : max_jint, - Type::WidenMax); - new_limit = new CMoveINode(underflow_bool, new_limit_with_underflow, underflow_clamp, no_underflow_t); + // limit = MinL(limit - stride, max_jint) + new_limit_no_underflow_l = new MinLNode(C, new_limit_l, underflow_clamp_l); } + register_new_node(new_limit_no_underflow_l, ctrl); + + // (4) Convert back to int. + new_limit = new ConvL2INode(new_limit_no_underflow_l); register_new_node(new_limit, ctrl); } @@ -2564,6 +2522,9 @@ void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) { //------------------------------adjust_limit----------------------------------- // Helper function that computes new loop limit as (rc_limit-offset)/scale Node* PhaseIdealLoop::adjust_limit(bool is_positive_stride, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round) { + Node* old_limit_long = new ConvI2LNode(old_limit); + register_new_node(old_limit_long, pre_ctrl); + Node* sub = new SubLNode(rc_limit, offset); register_new_node(sub, pre_ctrl); Node* limit = new DivLNode(nullptr, sub, scale); @@ -2589,27 +2550,19 @@ Node* PhaseIdealLoop::adjust_limit(bool is_positive_stride, Node* scale, Node* o // - integer underflow of limit: MAXL chooses old_limit (>= MIN_INT > limit) // INT() is finally converting the limit back to an integer value. - // We use CMove nodes to implement long versions of min/max (MINL/MAXL). - // We use helper methods for inner MINL/MAXL which return CMoveL nodes to keep a long value for the outer MINL/MAXL comparison: - Node* inner_result_long; + Node* inner_result_long = nullptr; + Node* outer_result_long = nullptr; if (is_positive_stride) { - inner_result_long = MaxNode::signed_max(limit, _igvn.longcon(min_jint), TypeLong::LONG, _igvn); + inner_result_long = new MaxLNode(C, limit, _igvn.longcon(min_jint)); + outer_result_long = new MinLNode(C, inner_result_long, old_limit_long); } else { - inner_result_long = MaxNode::signed_min(limit, _igvn.longcon(max_jint), TypeLong::LONG, _igvn); + inner_result_long = new MinLNode(C, limit, _igvn.longcon(max_jint)); + outer_result_long = new MaxLNode(C, inner_result_long, old_limit_long); } - set_subtree_ctrl(inner_result_long, false); + register_new_node(inner_result_long, pre_ctrl); + register_new_node(outer_result_long, pre_ctrl); - // Outer MINL/MAXL: - // The comparison is done with long values but the result is the converted back to int by using CmovI. - Node* old_limit_long = new ConvI2LNode(old_limit); - register_new_node(old_limit_long, pre_ctrl); - Node* cmp = new CmpLNode(old_limit_long, limit); - register_new_node(cmp, pre_ctrl); - Node* bol = new BoolNode(cmp, is_positive_stride ? BoolTest::gt : BoolTest::lt); - register_new_node(bol, pre_ctrl); - Node* inner_result_int = new ConvL2INode(inner_result_long); // Could under-/overflow but that's fine as comparison was done with CmpL - register_new_node(inner_result_int, pre_ctrl); - limit = new CMoveINode(bol, old_limit, inner_result_int, TypeInt::INT); + limit = new ConvL2INode(outer_result_long); register_new_node(limit, pre_ctrl); return limit; } diff --git a/src/hotspot/share/opto/macro.cpp b/src/hotspot/share/opto/macro.cpp index f2cfe06c75c..e9132f83274 100644 --- a/src/hotspot/share/opto/macro.cpp +++ b/src/hotspot/share/opto/macro.cpp @@ -2373,6 +2373,8 @@ void PhaseMacroExpand::eliminate_macro_nodes() { assert(n->Opcode() == Op_LoopLimit || n->Opcode() == Op_Opaque3 || n->Opcode() == Op_Opaque4 || + n->Opcode() == Op_MaxL || + n->Opcode() == Op_MinL || BarrierSet::barrier_set()->barrier_set_c2()->is_gc_barrier_node(n), "unknown node type in macro list"); } @@ -2457,6 +2459,18 @@ bool PhaseMacroExpand::expand_macro_nodes() { n->as_OuterStripMinedLoop()->adjust_strip_mined_loop(&_igvn); C->remove_macro_node(n); success = true; + } else if (n->Opcode() == Op_MaxL) { + // Since MaxL and MinL are not implemented in the backend, we expand them to + // a CMoveL construct now. At least until here, the type could be computed + // precisely. CMoveL is not so smart, but we can give it at least the best + // type we know abouot n now. + Node* repl = MaxNode::signed_max(n->in(1), n->in(2), _igvn.type(n), _igvn); + _igvn.replace_node(n, repl); + success = true; + } else if (n->Opcode() == Op_MinL) { + Node* repl = MaxNode::signed_min(n->in(1), n->in(2), _igvn.type(n), _igvn); + _igvn.replace_node(n, repl); + success = true; } assert(!success || (C->macro_count() == (old_macro_count - 1)), "elimination must have deleted one node from macro list"); progress = progress || success; diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index bdbdf006779..8c1dbaef277 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -654,6 +654,11 @@ public class IRNode { beforeMatchingNameRegex(MAX_I, "MaxI"); } + public static final String MAX_L = PREFIX + "MAX_L" + POSTFIX; + static { + beforeMatchingNameRegex(MAX_L, "MaxL"); + } + public static final String MAX_V = PREFIX + "MAX_V" + POSTFIX; static { beforeMatchingNameRegex(MAX_V, "MaxV"); @@ -679,6 +684,11 @@ public class IRNode { beforeMatchingNameRegex(MIN_I, "MinI"); } + public static final String MIN_L = PREFIX + "MIN_L" + POSTFIX; + static { + beforeMatchingNameRegex(MIN_L, "MinL"); + } + public static final String MIN_V = PREFIX + "MIN_V" + POSTFIX; static { beforeMatchingNameRegex(MIN_V, "MinV"); diff --git a/test/hotspot/jtreg/compiler/loopopts/TestLoopLimitSubtractionsCollapse.java b/test/hotspot/jtreg/compiler/loopopts/TestLoopLimitSubtractionsCollapse.java new file mode 100644 index 00000000000..129916cf6f7 --- /dev/null +++ b/test/hotspot/jtreg/compiler/loopopts/TestLoopLimitSubtractionsCollapse.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8303466 + * @summary Verify that AddL->MaxL->AddL->MaxL chains of unroll limit adjustments collapse. + * If it did not collapse, we would have about 10 MaxL/MinL. With the collapse, it + * is now one or two. + * @library /test/lib / + * @requires vm.compiler2.enabled + * @run driver compiler.loopopts.TestLoopLimitSubtractionsCollapse + */ + +package compiler.loopopts; +import compiler.lib.ir_framework.*; + +public class TestLoopLimitSubtractionsCollapse { + static int START = 0; + static int FINISH = 512; + static int RANGE = 512; + + static byte[] data1 = new byte[RANGE]; + static byte[] data2 = new byte[RANGE]; + + public static void main(String[] args) { + TestFramework.run(); + } + + @Test + @Warmup(0) + @IR(counts = {IRNode.MAX_L, "> 0", IRNode.MAX_L, "<= 2"}, + phase = CompilePhase.PHASEIDEALLOOP_ITERATIONS) + public static void test1() { + for (int j = START; j < FINISH; j++) { + data1[j] = (byte)(data1[j] * 11); + } + } + + @Test + @Warmup(0) + @IR(counts = {IRNode.MIN_L, "> 0", IRNode.MIN_L, "<= 2"}, + phase = CompilePhase.PHASEIDEALLOOP_ITERATIONS) + public static void test2() { + for (int j = FINISH-1; j >= START; j--) { + data2[j] = (byte)(data2[j] * 11); + } + } +} diff --git a/test/hotspot/jtreg/compiler/loopopts/TestUnrollLimitPreciseType.java b/test/hotspot/jtreg/compiler/loopopts/TestUnrollLimitPreciseType.java new file mode 100644 index 00000000000..18a58aa7ae5 --- /dev/null +++ b/test/hotspot/jtreg/compiler/loopopts/TestUnrollLimitPreciseType.java @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test id=test1 + * @bug 8298935 + * @summary CMoveI for underflow protection of the limit did not compute a type that was precise enough. + * This lead to dead data but zero-trip-guard control did not die -> "malformed control flow". + * @requires vm.compiler2.enabled + * @run main/othervm + * -XX:CompileCommand=compileonly,compiler.loopopts.TestUnrollLimitPreciseType::test1 + * -XX:CompileCommand=dontinline,compiler.loopopts.TestUnrollLimitPreciseType::* + * -XX:MaxVectorSize=64 + * -Xcomp + * -XX:+UnlockExperimentalVMOptions -XX:PerMethodSpecTrapLimit=0 -XX:PerMethodTrapLimit=0 + * compiler.loopopts.TestUnrollLimitPreciseType test1 + */ + +/* + * @test id=test2 + * @bug 8298935 + * @summary CMoveI for underflow protection of the limit did not compute a type that was precise enough. + * This lead to dead data but zero-trip-guard control did not die -> "malformed control flow". + * @requires vm.compiler2.enabled + * @run main/othervm + * -XX:CompileCommand=compileonly,compiler.loopopts.TestUnrollLimitPreciseType::* + * -Xcomp + * compiler.loopopts.TestUnrollLimitPreciseType test2 + */ + + +package compiler.loopopts; + +public class TestUnrollLimitPreciseType { + static final int RANGE = 512; + + public static void main(String args[]) { + if (args.length != 1) { + throw new RuntimeException("Need exactly one argument."); + } + if (args[0].equals("test1")) { + byte[] data = new byte[RANGE]; + test1(data); + } else if (args[0].equals("test2")) { + test2(); + } else { + throw new RuntimeException("Do not have: " + args[0]); + } + } + + public static void test1(byte[] data) { + // Did not fully analyze this. But it is also unrolled, SuperWorded, + // and further unrolled with vectorlized post loop. + // Only seems to reproduce with avx512, and not with avx2. + for (int j = 192; j < RANGE; j++) { + data[j - 192] = (byte)(data[j] * 11); + } + } + + static void test2() { + // Loop is SuperWord'ed. + // We unroll more afterwards, and so add vectorized post loop. + // But it turns out that the vectorized post loop is never entered. + // This lead to assert, because the zero-trip-guard did not collaspse, + // but the CastII with the trip count did die. + // Only seems to reproduce with avx512, and not with avx2. + double dArr[][] = new double[100][100]; + for (int i = 2, j = 2; j < 68; j++) { + dArr[i][j] = 8; + } + } +}