8303466: C2: failed: malformed control flow. Limit type made precise with MaxL/MinL

Reviewed-by: roland, kvn, chagedorn, thartmann
This commit is contained in:
Emanuel Peter 2023-04-26 05:42:26 +00:00
parent ed1ebd242a
commit cc894d849a
9 changed files with 386 additions and 91 deletions

View File

@ -1258,6 +1258,138 @@ const Type *MinINode::add_ring( const Type *t0, const Type *t1 ) const {
return TypeInt::make( MIN2(r0->_lo,r1->_lo), MIN2(r0->_hi,r1->_hi), MAX2(r0->_widen,r1->_widen) );
}
// Collapse the "addition with overflow-protection" pattern, and the symmetrical
// "subtraction with underflow-protection" pattern. These are created during the
// unrolling, when we have to adjust the limit by subtracting the stride, but want
// to protect against underflow: MaxL(SubL(limit, stride), min_jint).
// If we have more than one of those in a sequence:
//
// x con2
// | |
// AddL clamp2
// | |
// Max/MinL con1
// | |
// AddL clamp1
// | |
// Max/MinL (n)
//
// We want to collapse it to:
//
// x con1 con2
// | | |
// | AddLNode (new_con)
// | |
// AddLNode clamp1
// | |
// Max/MinL (n)
//
// Note: we assume that SubL was already replaced by an AddL, and that the stride
// has its sign flipped: SubL(limit, stride) -> AddL(limit, -stride).
Node* fold_subI_no_underflow_pattern(Node* n, PhaseGVN* phase) {
assert(n->Opcode() == Op_MaxL || n->Opcode() == Op_MinL, "sanity");
// Check that the two clamps have the correct values.
jlong clamp = (n->Opcode() == Op_MaxL) ? min_jint : max_jint;
auto is_clamp = [&](Node* c) {
const TypeLong* t = phase->type(c)->isa_long();
return t != nullptr && t->is_con() &&
t->get_con() == clamp;
};
// Check that the constants are negative if MaxL, and positive if MinL.
auto is_sub_con = [&](Node* c) {
const TypeLong* t = phase->type(c)->isa_long();
return t != nullptr && t->is_con() &&
t->get_con() < max_jint && t->get_con() > min_jint &&
(t->get_con() < 0) == (n->Opcode() == Op_MaxL);
};
// Verify the graph level by level:
Node* add1 = n->in(1);
Node* clamp1 = n->in(2);
if (add1->Opcode() == Op_AddL && is_clamp(clamp1)) {
Node* max2 = add1->in(1);
Node* con1 = add1->in(2);
if (max2->Opcode() == n->Opcode() && is_sub_con(con1)) {
Node* add2 = max2->in(1);
Node* clamp2 = max2->in(2);
if (add2->Opcode() == Op_AddL && is_clamp(clamp2)) {
Node* x = add2->in(1);
Node* con2 = add2->in(2);
if (is_sub_con(con2)) {
Node* new_con = phase->transform(new AddLNode(con1, con2));
Node* new_sub = phase->transform(new AddLNode(x, new_con));
n->set_req_X(1, new_sub, phase);
return n;
}
}
}
}
return nullptr;
}
const Type* MaxLNode::add_ring(const Type* t0, const Type* t1) const {
const TypeLong* r0 = t0->is_long();
const TypeLong* r1 = t1->is_long();
return TypeLong::make(MAX2(r0->_lo, r1->_lo), MAX2(r0->_hi, r1->_hi), MAX2(r0->_widen, r1->_widen));
}
Node* MaxLNode::Identity(PhaseGVN* phase) {
const TypeLong* t1 = phase->type(in(1))->is_long();
const TypeLong* t2 = phase->type(in(2))->is_long();
// Can we determine maximum statically?
if (t1->_lo >= t2->_hi) {
return in(1);
} else if (t2->_lo >= t1->_hi) {
return in(2);
}
return MaxNode::Identity(phase);
}
Node* MaxLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
Node* n = AddNode::Ideal(phase, can_reshape);
if (n != nullptr) {
return n;
}
if (can_reshape) {
return fold_subI_no_underflow_pattern(this, phase);
}
return nullptr;
}
const Type* MinLNode::add_ring(const Type* t0, const Type* t1) const {
const TypeLong* r0 = t0->is_long();
const TypeLong* r1 = t1->is_long();
return TypeLong::make(MIN2(r0->_lo, r1->_lo), MIN2(r0->_hi, r1->_hi), MIN2(r0->_widen, r1->_widen));
}
Node* MinLNode::Identity(PhaseGVN* phase) {
const TypeLong* t1 = phase->type(in(1))->is_long();
const TypeLong* t2 = phase->type(in(2))->is_long();
// Can we determine minimum statically?
if (t1->_lo >= t2->_hi) {
return in(2);
} else if (t2->_lo >= t1->_hi) {
return in(1);
}
return MaxNode::Identity(phase);
}
Node* MinLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
Node* n = AddNode::Ideal(phase, can_reshape);
if (n != nullptr) {
return n;
}
if (can_reshape) {
return fold_subI_no_underflow_pattern(this, phase);
}
return nullptr;
}
//------------------------------add_ring---------------------------------------
const Type *MinFNode::add_ring( const Type *t0, const Type *t1 ) const {
const TypeF *r0 = t0->is_float_constant();

View File

@ -322,28 +322,38 @@ public:
// MAXimum of 2 longs.
class MaxLNode : public MaxNode {
public:
MaxLNode(Node *in1, Node *in2) : MaxNode(in1, in2) {}
MaxLNode(Compile* C, Node* in1, Node* in2) : MaxNode(in1, in2) {
init_flags(Flag_is_macro);
C->add_macro_node(this);
}
virtual int Opcode() const;
virtual const Type *add_ring(const Type*, const Type*) const { return TypeLong::LONG; }
virtual const Type* add_ring(const Type* t0, const Type* t1) const;
virtual const Type* add_id() const { return TypeLong::make(min_jlong); }
virtual const Type* bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
int max_opcode() const { return Op_MaxL; }
int min_opcode() const { return Op_MinL; }
virtual Node* Identity(PhaseGVN* phase);
virtual Node* Ideal(PhaseGVN *phase, bool can_reshape);
};
//------------------------------MinLNode---------------------------------------
// MINimum of 2 longs.
class MinLNode : public MaxNode {
public:
MinLNode(Node *in1, Node *in2) : MaxNode(in1, in2) {}
MinLNode(Compile* C, Node* in1, Node* in2) : MaxNode(in1, in2) {
init_flags(Flag_is_macro);
C->add_macro_node(this);
}
virtual int Opcode() const;
virtual const Type *add_ring(const Type*, const Type*) const { return TypeLong::LONG; }
virtual const Type* add_ring(const Type* t0, const Type* t1) const;
virtual const Type* add_id() const { return TypeLong::make(max_jlong); }
virtual const Type* bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
int max_opcode() const { return Op_MaxL; }
int min_opcode() const { return Op_MinL; }
virtual Node* Identity(PhaseGVN* phase);
virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
};
//------------------------------MaxFNode---------------------------------------

View File

@ -322,6 +322,20 @@ const Type* ConvI2LNode::Value(PhaseGVN* phase) const {
return this_type;
}
Node* ConvI2LNode::Identity(PhaseGVN* phase) {
// If type is in "int" sub-range, we can
// convert I2L(L2I(x)) => x
// since the conversions have no effect.
if (in(1)->Opcode() == Op_ConvL2I) {
Node* x = in(1)->in(1);
const TypeLong* t = phase->type(x)->isa_long();
if (t != nullptr && t->_lo >= min_jint && t->_hi <= max_jint) {
return x;
}
}
return this;
}
#ifdef ASSERT
static inline bool long_ranges_overlap(jlong lo1, jlong hi1,
jlong lo2, jlong hi2) {

View File

@ -190,6 +190,7 @@ class ConvI2LNode : public TypeNode {
virtual int Opcode() const;
virtual const Type* Value(PhaseGVN* phase) const;
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual Node* Identity(PhaseGVN* phase);
virtual uint ideal_reg() const { return Op_RegL; }
};

View File

@ -2288,74 +2288,32 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj
new_limit = _igvn.intcon(limit->get_int() - stride_con);
set_ctrl(new_limit, C->root());
} else {
// Limit is not constant.
assert(loop_head->unrolled_count() != 1 || has_ctrl(opaq), "should have opaque for first unroll");
if ((stride_con > 0 && (java_subtract(limit_type->_lo, stride_con) < limit_type->_lo)) ||
(stride_con < 0 && (java_subtract(limit_type->_hi, stride_con) > limit_type->_hi))) {
// No underflow.
new_limit = new SubINode(limit, stride);
// Limit is not constant. Int subtraction could lead to underflow.
// (1) Convert to long.
Node* limit_l = new ConvI2LNode(limit);
register_new_node(limit_l, get_ctrl(limit));
Node* stride_l = _igvn.longcon(stride_con);
set_ctrl(stride_l, C->root());
// (2) Subtract: compute in long, to prevent underflow.
Node* new_limit_l = new SubLNode(limit_l, stride_l);
register_new_node(new_limit_l, ctrl);
// (3) Clamp to int range, in case we had subtraction underflow.
Node* underflow_clamp_l = _igvn.longcon((stride_con > 0) ? min_jint : max_jint);
set_ctrl(underflow_clamp_l, C->root());
Node* new_limit_no_underflow_l = nullptr;
if (stride_con > 0) {
// limit = MaxL(limit - stride, min_jint)
new_limit_no_underflow_l = new MaxLNode(C, new_limit_l, underflow_clamp_l);
} else {
// (limit - stride) may underflow.
// Clamp the adjustment value with MININT or MAXINT:
//
// new_limit = limit-stride
// if (stride > 0)
// new_limit = (limit < new_limit) ? MININT : new_limit;
// else
// new_limit = (limit > new_limit) ? MAXINT : new_limit;
//
BoolTest::mask bt = loop_end->test_trip();
assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
Node* underflow_clamp = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
set_ctrl(underflow_clamp, C->root());
Node* limit_before_underflow = nullptr;
Node* prev_limit = nullptr;
Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : nullptr;
if (loop_head->unrolled_count() > 1 &&
limit->is_CMove() && limit->Opcode() == Op_CMoveI &&
limit->in(CMoveNode::IfTrue) == underflow_clamp &&
bol->as_Bool()->_test._test == bt &&
bol->in(1)->Opcode() == Op_CmpI &&
bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
// Loop was unrolled before, and had an unrolling protection CMoveI.
// Use inputs to previous CMoveI for the new one:
prev_limit = limit->in(CMoveNode::IfFalse); // unpack previous limit with underflow
limit_before_underflow = bol->in(1)->in(1); // CMoveI -> Bool -> CmpI -> limit_before_underflow
} else {
// Loop was not unrolled before, or the limit did not underflow in a previous unrolling.
prev_limit = limit;
limit_before_underflow = limit;
}
// prev_limit stride
// | |
// limit_before_underflow new_limit_with_underflow (SubI)
// | | |
// underflow_cmp |
// | |
// underflow_bool [lt/gt] |
// | |
// +----+ +------------+
// | |
// | | underflow_clamp (min_jint/max_jint)
// | | |
// CMoveINode ([min_jint..hi] / [lo..max_jing])
//
assert(limit_before_underflow != nullptr && prev_limit != nullptr, "must find them");
Node* new_limit_with_underflow = new SubINode(prev_limit, stride);
register_new_node(new_limit_with_underflow, ctrl);
// We must compare with limit_before_underflow, prev_limit may already have underflowed.
Node* underflow_cmp = new CmpINode(limit_before_underflow, new_limit_with_underflow);
register_new_node(underflow_cmp, ctrl);
Node* underflow_bool = new BoolNode(underflow_cmp, bt);
register_new_node(underflow_bool, ctrl);
// Prevent type from becoming too pessimistic due to type underflow. The new limit
// may be arbitrarily decreased by unrolling, but still in [min_jint..hi] / [lo..max_jint]
const TypeInt* limit_before_underflow_t = _igvn.type(limit_before_underflow)->is_int();
const TypeInt* no_underflow_t = TypeInt::make(stride_con > 0 ? min_jint : limit_before_underflow_t->_lo,
stride_con > 0 ? limit_before_underflow_t->_hi : max_jint,
Type::WidenMax);
new_limit = new CMoveINode(underflow_bool, new_limit_with_underflow, underflow_clamp, no_underflow_t);
// limit = MinL(limit - stride, max_jint)
new_limit_no_underflow_l = new MinLNode(C, new_limit_l, underflow_clamp_l);
}
register_new_node(new_limit_no_underflow_l, ctrl);
// (4) Convert back to int.
new_limit = new ConvL2INode(new_limit_no_underflow_l);
register_new_node(new_limit, ctrl);
}
@ -2564,6 +2522,9 @@ void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
//------------------------------adjust_limit-----------------------------------
// Helper function that computes new loop limit as (rc_limit-offset)/scale
Node* PhaseIdealLoop::adjust_limit(bool is_positive_stride, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round) {
Node* old_limit_long = new ConvI2LNode(old_limit);
register_new_node(old_limit_long, pre_ctrl);
Node* sub = new SubLNode(rc_limit, offset);
register_new_node(sub, pre_ctrl);
Node* limit = new DivLNode(nullptr, sub, scale);
@ -2589,27 +2550,19 @@ Node* PhaseIdealLoop::adjust_limit(bool is_positive_stride, Node* scale, Node* o
// - integer underflow of limit: MAXL chooses old_limit (>= MIN_INT > limit)
// INT() is finally converting the limit back to an integer value.
// We use CMove nodes to implement long versions of min/max (MINL/MAXL).
// We use helper methods for inner MINL/MAXL which return CMoveL nodes to keep a long value for the outer MINL/MAXL comparison:
Node* inner_result_long;
Node* inner_result_long = nullptr;
Node* outer_result_long = nullptr;
if (is_positive_stride) {
inner_result_long = MaxNode::signed_max(limit, _igvn.longcon(min_jint), TypeLong::LONG, _igvn);
inner_result_long = new MaxLNode(C, limit, _igvn.longcon(min_jint));
outer_result_long = new MinLNode(C, inner_result_long, old_limit_long);
} else {
inner_result_long = MaxNode::signed_min(limit, _igvn.longcon(max_jint), TypeLong::LONG, _igvn);
inner_result_long = new MinLNode(C, limit, _igvn.longcon(max_jint));
outer_result_long = new MaxLNode(C, inner_result_long, old_limit_long);
}
set_subtree_ctrl(inner_result_long, false);
register_new_node(inner_result_long, pre_ctrl);
register_new_node(outer_result_long, pre_ctrl);
// Outer MINL/MAXL:
// The comparison is done with long values but the result is the converted back to int by using CmovI.
Node* old_limit_long = new ConvI2LNode(old_limit);
register_new_node(old_limit_long, pre_ctrl);
Node* cmp = new CmpLNode(old_limit_long, limit);
register_new_node(cmp, pre_ctrl);
Node* bol = new BoolNode(cmp, is_positive_stride ? BoolTest::gt : BoolTest::lt);
register_new_node(bol, pre_ctrl);
Node* inner_result_int = new ConvL2INode(inner_result_long); // Could under-/overflow but that's fine as comparison was done with CmpL
register_new_node(inner_result_int, pre_ctrl);
limit = new CMoveINode(bol, old_limit, inner_result_int, TypeInt::INT);
limit = new ConvL2INode(outer_result_long);
register_new_node(limit, pre_ctrl);
return limit;
}

View File

@ -2373,6 +2373,8 @@ void PhaseMacroExpand::eliminate_macro_nodes() {
assert(n->Opcode() == Op_LoopLimit ||
n->Opcode() == Op_Opaque3 ||
n->Opcode() == Op_Opaque4 ||
n->Opcode() == Op_MaxL ||
n->Opcode() == Op_MinL ||
BarrierSet::barrier_set()->barrier_set_c2()->is_gc_barrier_node(n),
"unknown node type in macro list");
}
@ -2457,6 +2459,18 @@ bool PhaseMacroExpand::expand_macro_nodes() {
n->as_OuterStripMinedLoop()->adjust_strip_mined_loop(&_igvn);
C->remove_macro_node(n);
success = true;
} else if (n->Opcode() == Op_MaxL) {
// Since MaxL and MinL are not implemented in the backend, we expand them to
// a CMoveL construct now. At least until here, the type could be computed
// precisely. CMoveL is not so smart, but we can give it at least the best
// type we know abouot n now.
Node* repl = MaxNode::signed_max(n->in(1), n->in(2), _igvn.type(n), _igvn);
_igvn.replace_node(n, repl);
success = true;
} else if (n->Opcode() == Op_MinL) {
Node* repl = MaxNode::signed_min(n->in(1), n->in(2), _igvn.type(n), _igvn);
_igvn.replace_node(n, repl);
success = true;
}
assert(!success || (C->macro_count() == (old_macro_count - 1)), "elimination must have deleted one node from macro list");
progress = progress || success;

View File

@ -654,6 +654,11 @@ public class IRNode {
beforeMatchingNameRegex(MAX_I, "MaxI");
}
public static final String MAX_L = PREFIX + "MAX_L" + POSTFIX;
static {
beforeMatchingNameRegex(MAX_L, "MaxL");
}
public static final String MAX_V = PREFIX + "MAX_V" + POSTFIX;
static {
beforeMatchingNameRegex(MAX_V, "MaxV");
@ -679,6 +684,11 @@ public class IRNode {
beforeMatchingNameRegex(MIN_I, "MinI");
}
public static final String MIN_L = PREFIX + "MIN_L" + POSTFIX;
static {
beforeMatchingNameRegex(MIN_L, "MinL");
}
public static final String MIN_V = PREFIX + "MIN_V" + POSTFIX;
static {
beforeMatchingNameRegex(MIN_V, "MinV");

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8303466
* @summary Verify that AddL->MaxL->AddL->MaxL chains of unroll limit adjustments collapse.
* If it did not collapse, we would have about 10 MaxL/MinL. With the collapse, it
* is now one or two.
* @library /test/lib /
* @requires vm.compiler2.enabled
* @run driver compiler.loopopts.TestLoopLimitSubtractionsCollapse
*/
package compiler.loopopts;
import compiler.lib.ir_framework.*;
public class TestLoopLimitSubtractionsCollapse {
static int START = 0;
static int FINISH = 512;
static int RANGE = 512;
static byte[] data1 = new byte[RANGE];
static byte[] data2 = new byte[RANGE];
public static void main(String[] args) {
TestFramework.run();
}
@Test
@Warmup(0)
@IR(counts = {IRNode.MAX_L, "> 0", IRNode.MAX_L, "<= 2"},
phase = CompilePhase.PHASEIDEALLOOP_ITERATIONS)
public static void test1() {
for (int j = START; j < FINISH; j++) {
data1[j] = (byte)(data1[j] * 11);
}
}
@Test
@Warmup(0)
@IR(counts = {IRNode.MIN_L, "> 0", IRNode.MIN_L, "<= 2"},
phase = CompilePhase.PHASEIDEALLOOP_ITERATIONS)
public static void test2() {
for (int j = FINISH-1; j >= START; j--) {
data2[j] = (byte)(data2[j] * 11);
}
}
}

View File

@ -0,0 +1,92 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test id=test1
* @bug 8298935
* @summary CMoveI for underflow protection of the limit did not compute a type that was precise enough.
* This lead to dead data but zero-trip-guard control did not die -> "malformed control flow".
* @requires vm.compiler2.enabled
* @run main/othervm
* -XX:CompileCommand=compileonly,compiler.loopopts.TestUnrollLimitPreciseType::test1
* -XX:CompileCommand=dontinline,compiler.loopopts.TestUnrollLimitPreciseType::*
* -XX:MaxVectorSize=64
* -Xcomp
* -XX:+UnlockExperimentalVMOptions -XX:PerMethodSpecTrapLimit=0 -XX:PerMethodTrapLimit=0
* compiler.loopopts.TestUnrollLimitPreciseType test1
*/
/*
* @test id=test2
* @bug 8298935
* @summary CMoveI for underflow protection of the limit did not compute a type that was precise enough.
* This lead to dead data but zero-trip-guard control did not die -> "malformed control flow".
* @requires vm.compiler2.enabled
* @run main/othervm
* -XX:CompileCommand=compileonly,compiler.loopopts.TestUnrollLimitPreciseType::*
* -Xcomp
* compiler.loopopts.TestUnrollLimitPreciseType test2
*/
package compiler.loopopts;
public class TestUnrollLimitPreciseType {
static final int RANGE = 512;
public static void main(String args[]) {
if (args.length != 1) {
throw new RuntimeException("Need exactly one argument.");
}
if (args[0].equals("test1")) {
byte[] data = new byte[RANGE];
test1(data);
} else if (args[0].equals("test2")) {
test2();
} else {
throw new RuntimeException("Do not have: " + args[0]);
}
}
public static void test1(byte[] data) {
// Did not fully analyze this. But it is also unrolled, SuperWorded,
// and further unrolled with vectorlized post loop.
// Only seems to reproduce with avx512, and not with avx2.
for (int j = 192; j < RANGE; j++) {
data[j - 192] = (byte)(data[j] * 11);
}
}
static void test2() {
// Loop is SuperWord'ed.
// We unroll more afterwards, and so add vectorized post loop.
// But it turns out that the vectorized post loop is never entered.
// This lead to assert, because the zero-trip-guard did not collaspse,
// but the CastII with the trip count did die.
// Only seems to reproduce with avx512, and not with avx2.
double dArr[][] = new double[100][100];
for (int i = 2, j = 2; j < 68; j++) {
dArr[i][j] = 8;
}
}
}