8235984: C2: assert(out->in(PhiNode::Region) == head || out->in(PhiNode::Region) == slow_head) failed: phi must be either part of the slow or the fast loop
Bailout from loop unswitching if loop predicates have a control dependency to partially peeled statements. Reviewed-by: neliasso, thartmann
This commit is contained in:
parent
cc99075c79
commit
3e9a17c53e
@ -118,9 +118,20 @@ IfNode* PhaseIdealLoop::find_unswitching_candidate(const IdealLoopTree *loop) co
|
||||
// execute.
|
||||
void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) {
|
||||
|
||||
// Find first invariant test that doesn't exit the loop
|
||||
LoopNode *head = loop->_head->as_Loop();
|
||||
|
||||
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
|
||||
if (find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check) != NULL
|
||||
|| (UseProfiledLoopPredicate && find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate) != NULL)
|
||||
|| (UseLoopPredicate && find_predicate_insertion_point(entry, Deoptimization::Reason_predicate) != NULL)) {
|
||||
assert(entry->is_IfProj(), "sanity - must be ifProj since there is at least one predicate");
|
||||
if (entry->outcnt() > 1) {
|
||||
// Bailout if there are loop predicates from which there are additional control dependencies (i.e. from
|
||||
// loop entry 'entry') to previously partially peeled statements since this case is not handled and can lead
|
||||
// to wrong execution. Remove this bailout, once this is fixed.
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Find first invariant test that doesn't exit the loop
|
||||
IfNode* unswitch_iff = find_unswitching_candidate((const IdealLoopTree *)loop);
|
||||
assert(unswitch_iff != NULL, "should be at least one");
|
||||
|
||||
@ -140,7 +151,7 @@ void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) {
|
||||
|
||||
#ifdef ASSERT
|
||||
Node* uniqc = proj_true->unique_ctrl_out();
|
||||
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
|
||||
entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
|
||||
Node* predicate = find_predicate(entry);
|
||||
if (predicate != NULL) {
|
||||
entry = skip_loop_predicates(entry);
|
||||
@ -281,123 +292,6 @@ ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
|
||||
_igvn.replace_input_of(slow_l, LoopNode::EntryControl, ifslow_pred);
|
||||
set_idom(slow_l, ifslow_pred, dom_depth(l));
|
||||
|
||||
if (iffast != iffast_pred && entry->outcnt() > 1) {
|
||||
// This situation occurs when only non-CFG nodes (i.e. no control dependencies between them) with a control
|
||||
// input from the loop header were partially peeled before (now control dependent on loop entry control).
|
||||
// If additional CFG nodes were peeled, then the insertion point of the loop predicates from the parsing stage
|
||||
// would not be found anymore and the predicates not cloned at all (i.e. iffast == iffast_pred) as it happens
|
||||
// for normal peeling. Those partially peeled statements have a control input from the old loop entry control
|
||||
// and need to be executed after the predicates. These control dependencies need to be removed from the old
|
||||
// entry control and added to the new entry control nodes 'iffast_pred' and 'ifslow_pred'. Since each node can
|
||||
// only have one control input, we need to create clones for all statements (2) that can be reached over a path
|
||||
// from the old entry control 'entry' (1) to a loop phi (8, 9). The old nodes (2) will be moved to the fast loop and the
|
||||
// new cloned nodes (10) to the slow loop.
|
||||
//
|
||||
// The result of the following algorithm is visualized below. The cloned loop predicates for the fast loop
|
||||
// are between the loop selection node (3) and the entry control for the fast loop (4) and for the slow loop
|
||||
// between the loop selection node (3) and the entry control for the slow loop (5), respectively.
|
||||
//
|
||||
// 1 entry 1 entry
|
||||
// / \ |
|
||||
// 2 stmt 3 iff 3 iff
|
||||
// | / \ / \
|
||||
// | .. .. .. ..
|
||||
// | / \ / \
|
||||
// | 4 iffast_p 5 ifslow_p 4 iffast_p 5 ifslow_p
|
||||
// | | | / \ / \
|
||||
// | 6 head 7 slow_head ==> 6 head 2 stmt 7 slow_head 10 cloned_stmt
|
||||
// | | | \ / \ /
|
||||
// +--\ | +--\ | 8 phi 9 phi
|
||||
// | 8 phi | 9 phi
|
||||
// | |
|
||||
// +----------+
|
||||
//
|
||||
assert(ifslow != ifslow_pred, "sanity - must also be different");
|
||||
|
||||
ResourceMark rm;
|
||||
Unique_Node_List worklist;
|
||||
Unique_Node_List phis;
|
||||
Node_List old_clone;
|
||||
LoopNode* slow_head = old_new[head->_idx]->as_Loop();
|
||||
|
||||
// 1) Do a BFS starting from the outputs of the original entry control node 'entry' to all (loop) phis
|
||||
// and add the non-phi nodes to the worklist.
|
||||
// First get all outputs of 'entry' which are not the new "loop selection check" 'iff'.
|
||||
for (DUIterator_Fast imax, i = entry->fast_outs(imax); i < imax; i++) {
|
||||
Node* stmt = entry->fast_out(i);
|
||||
if (stmt != iff) {
|
||||
assert(!stmt->is_CFG(), "cannot be a CFG node");
|
||||
worklist.push(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
// Then do a BFS from all collected nodes so far and stop if a phi node is hit.
|
||||
// Keep track of them on a separate 'phis' list to adjust their inputs later.
|
||||
for (uint i = 0; i < worklist.size(); i++) {
|
||||
Node* stmt = worklist.at(i);
|
||||
for (DUIterator_Fast jmax, j = stmt->fast_outs(jmax); j < jmax; j++) {
|
||||
Node* out = stmt->fast_out(j);
|
||||
assert(!out->is_CFG(), "cannot be a CFG node");
|
||||
if (out->is_Phi()) {
|
||||
assert(out->in(PhiNode::Region) == head || out->in(PhiNode::Region) == slow_head,
|
||||
"phi must be either part of the slow or the fast loop");
|
||||
phis.push(out);
|
||||
} else {
|
||||
worklist.push(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2) All nodes of interest are in 'worklist' and are now cloned. This could not be done simultaneously
|
||||
// in step 1 in an easy way because we could have cloned a node which has an input that is added to the
|
||||
// worklist later. As a result, the BFS would hit a clone which does not need to be cloned again.
|
||||
// While cloning a node, the control inputs to 'entry' are updated such that the old node points to
|
||||
// 'iffast_pred' and the clone to 'ifslow_pred', respectively.
|
||||
for (uint i = 0; i < worklist.size(); i++) {
|
||||
Node* stmt = worklist.at(i);
|
||||
assert(!stmt->is_CFG(), "cannot be a CFG node");
|
||||
Node* cloned_stmt = stmt->clone();
|
||||
old_clone.map(stmt->_idx, cloned_stmt);
|
||||
_igvn.register_new_node_with_optimizer(cloned_stmt);
|
||||
|
||||
if (stmt->in(0) == entry) {
|
||||
_igvn.replace_input_of(stmt, 0, iffast_pred);
|
||||
set_ctrl(stmt, iffast_pred);
|
||||
_igvn.replace_input_of(cloned_stmt, 0, ifslow_pred);
|
||||
set_ctrl(cloned_stmt, ifslow_pred);
|
||||
}
|
||||
}
|
||||
|
||||
// 3) Update the entry control of all collected phi nodes of the slow loop to use the cloned nodes
|
||||
// instead of the old ones from the worklist
|
||||
for (uint i = 0; i < phis.size(); i++) {
|
||||
assert(phis.at(i)->is_Phi(), "must be a phi");
|
||||
PhiNode* phi = phis.at(i)->as_Phi();
|
||||
if (phi->in(PhiNode::Region) == slow_head) {
|
||||
// Slow loop: Update phi entry control to use the cloned version instead of the old one from the worklist
|
||||
Node* entry_control = phi->in(LoopNode::EntryControl);
|
||||
_igvn.replace_input_of(phi, LoopNode::EntryControl, old_clone[phi->in(LoopNode::EntryControl)->_idx]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// 4) Replace all input edges of cloned nodes from old nodes on the worklist by an input edge from their
|
||||
// corresponding cloned version.
|
||||
for (uint i = 0; i < worklist.size(); i++) {
|
||||
Node* stmt = worklist.at(i);
|
||||
for (uint j = 0; j < stmt->req(); j++) {
|
||||
Node* in = stmt->in(j);
|
||||
if (in == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (worklist.contains(in)) {
|
||||
// Replace the edge old1->clone_of_old_2 with an edge clone_of_old1->clone_of_old2
|
||||
old_clone[stmt->_idx]->set_req(j, old_clone[in->_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
recompute_dom_depth();
|
||||
|
||||
return iffast;
|
||||
|
@ -23,11 +23,12 @@
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8233033
|
||||
* @summary Tests if partially peeled statements are not executed before the loop predicates of the unswitched fast loop.
|
||||
* @bug 8233033 8235984
|
||||
* @summary Tests if partially peeled statements are not executed before the loop predicates by bailing out of loop unswitching.
|
||||
*
|
||||
* @run main/othervm -Xbatch -XX:-TieredCompilation
|
||||
* @run main/othervm -Xbatch
|
||||
* -XX:CompileCommand=compileonly,compiler.loopopts.PartialPeelingUnswitch::test*
|
||||
* -XX:CompileCommand=dontinline,compiler.loopopts.PartialPeelingUnswitch::dontInline
|
||||
* compiler.loopopts.PartialPeelingUnswitch
|
||||
*/
|
||||
|
||||
@ -38,6 +39,7 @@ public class PartialPeelingUnswitch {
|
||||
public static int iFld;
|
||||
public static int x = 42;
|
||||
public static int y = 31;
|
||||
public static int z = 22;
|
||||
public static int[] iArr = new int[10];
|
||||
|
||||
public int test() {
|
||||
@ -46,8 +48,9 @@ public class PartialPeelingUnswitch {
|
||||
* of the cloned loop predicates for the fast loop (set up at unswitching stage). The only partially peeled
|
||||
* statement "iFld += 7" was wrongly executed before the predicates (and before the loop itself).
|
||||
* When hitting the uncommon trap, "iFld >>= 1" was not yet executed. As a result, the interpreter directly
|
||||
* reexecuted "iFld += 7" again. This resulted in a wrong result for "iFld". The fix makes peeled statements
|
||||
* control dependant on the cloned loop predicates such that they are executed after them.
|
||||
* reexecuted "iFld += 7" again. This resulted in a wrong result for "iFld". The fix in 8233033 makes peeled
|
||||
* statements control dependant on the cloned loop predicates such that they are executed after them. However,
|
||||
* some cases are not handled properly. For now, the new fix in 8235984 just bails out of loop unswitching.
|
||||
*/
|
||||
iFld = 13;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
@ -103,16 +106,162 @@ public class PartialPeelingUnswitch {
|
||||
return iFld + k;
|
||||
}
|
||||
|
||||
public int test3() {
|
||||
iFld = 13;
|
||||
if (z < 34) {
|
||||
z = 34;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
int j = 10;
|
||||
while (--j > 0) {
|
||||
iFld += -7;
|
||||
iArr[5] = 8;
|
||||
x = iArr[6];
|
||||
y = x;
|
||||
for (int k = 50; k < 51; k++) {
|
||||
x = iArr[7];
|
||||
}
|
||||
switch ((i * 5) + 102) {
|
||||
case 120:
|
||||
return iFld;
|
||||
case 103:
|
||||
break;
|
||||
case 116:
|
||||
break;
|
||||
default:
|
||||
if (iFld == -7) {
|
||||
return iFld;
|
||||
}
|
||||
z = iArr[5];
|
||||
iFld >>= 1;
|
||||
}
|
||||
}
|
||||
iArr[5] = 34;
|
||||
dontInline(iArr[5]);
|
||||
}
|
||||
return iFld;
|
||||
}
|
||||
|
||||
public int test4() {
|
||||
iFld = 13;
|
||||
if (z < 34) {
|
||||
z = 34;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
int j = 10;
|
||||
while (--j > 0) {
|
||||
iFld += -7;
|
||||
iArr[5] = 8;
|
||||
x = iArr[6];
|
||||
y = x;
|
||||
for (int k = 50; k < 51; k++) {
|
||||
x = iArr[7];
|
||||
}
|
||||
switch ((i * 5) + 102) {
|
||||
case 120:
|
||||
return iFld;
|
||||
case 103:
|
||||
break;
|
||||
case 116:
|
||||
break;
|
||||
default:
|
||||
if (iFld == -7) {
|
||||
return iFld;
|
||||
}
|
||||
z = iArr[5];
|
||||
iFld >>= 1;
|
||||
}
|
||||
}
|
||||
iArr[5] = 34;
|
||||
}
|
||||
return iFld;
|
||||
}
|
||||
|
||||
public int test5() {
|
||||
iFld = 13;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
int j = 10;
|
||||
while (--j > 0) {
|
||||
iFld += -7;
|
||||
iArr[5] = 8;
|
||||
x = iArr[6];
|
||||
y = x;
|
||||
for (int k = 50; k < 51; k++) {
|
||||
x = iArr[7];
|
||||
}
|
||||
switch ((i * 5) + 102) {
|
||||
case 120:
|
||||
return iFld;
|
||||
case 103:
|
||||
break;
|
||||
case 116:
|
||||
break;
|
||||
default:
|
||||
iFld >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return iFld;
|
||||
}
|
||||
|
||||
public int test6() {
|
||||
iFld = 13;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
int j = 10;
|
||||
while (--j > 0) {
|
||||
iFld += -7;
|
||||
iArr[5] = 8;
|
||||
x = iArr[6];
|
||||
y = x;
|
||||
switch ((i * 5) + 102) {
|
||||
case 120:
|
||||
return iFld;
|
||||
case 103:
|
||||
break;
|
||||
case 116:
|
||||
break;
|
||||
default:
|
||||
iFld >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return iFld;
|
||||
}
|
||||
|
||||
public int test7() {
|
||||
iFld = 13;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
int j = 10;
|
||||
while (--j > 0) {
|
||||
iFld += -7;
|
||||
iArr[5] = 8;
|
||||
switch ((i * 5) + 102) {
|
||||
case 120:
|
||||
return iFld;
|
||||
case 103:
|
||||
break;
|
||||
case 116:
|
||||
break;
|
||||
default:
|
||||
iFld >>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return iFld;
|
||||
}
|
||||
|
||||
public static void main(String[] strArr) {
|
||||
PartialPeelingUnswitch _instance = new PartialPeelingUnswitch();
|
||||
for (int i = 0; i < 200; i++) {
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
int result = _instance.test();
|
||||
if (result != -7) {
|
||||
throw new RuntimeException("Result should always be -7 but was " + result);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 200; i++) {
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
int result = _instance.test2();
|
||||
check(-1, result);
|
||||
check(-7, iFld);
|
||||
@ -129,6 +278,22 @@ public class PartialPeelingUnswitch {
|
||||
x = 42;
|
||||
y = 31;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
_instance.test3();
|
||||
_instance.test4();
|
||||
_instance.test5();
|
||||
_instance.test6();
|
||||
_instance.test7();
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
if (i % 2 == 0) {
|
||||
z = 23;
|
||||
}
|
||||
_instance.test3();
|
||||
_instance.test4();
|
||||
}
|
||||
}
|
||||
|
||||
public static void check(int expected, int actual) {
|
||||
@ -136,4 +301,6 @@ public class PartialPeelingUnswitch {
|
||||
throw new RuntimeException("Wrong result, expected: " + expected + ", actual: " + actual);
|
||||
}
|
||||
}
|
||||
|
||||
public void dontInline(int i) { }
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user