8205033: [REDO] Induction variable of over-unrolled loop conflicts with range checks
Update skeleton predicates before main loop during unrolling to remove dead code. Reviewed-by: kvn, roland
This commit is contained in:
parent
5208900fce
commit
d2f70d7052
@ -91,7 +91,7 @@ void PhaseIdealLoop::register_control(Node* n, IdealLoopTree *loop, Node* pred)
|
||||
//
|
||||
//
|
||||
// We will create a region to guard the uct call if there is no one there.
|
||||
// The true projecttion (if_cont) of the new_iff is returned.
|
||||
// The true projection (if_cont) of the new_iff is returned.
|
||||
// This code is also used to clone predicates to cloned loops.
|
||||
ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
|
||||
Deoptimization::DeoptReason reason,
|
||||
@ -1212,10 +1212,10 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree *loop, ProjNode*
|
||||
|
||||
|
||||
// After pre/main/post loops are created, we'll put a copy of some
|
||||
// range checks between the pre and main loop to validate the initial
|
||||
// value of the induction variable for the main loop. Make a copy of
|
||||
// the predicates here with an opaque node as a place holder for the
|
||||
// initial value.
|
||||
// range checks between the pre and main loop to validate the value
|
||||
// of the main loop induction variable. Make a copy of the predicates
|
||||
// here with an opaque node as a place holder for the value (will be
|
||||
// updated by PhaseIdealLoop::update_skeleton_predicate()).
|
||||
ProjNode* PhaseIdealLoop::insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
|
||||
ProjNode* proj, ProjNode *predicate_proj,
|
||||
ProjNode* upper_bound_proj,
|
||||
|
@ -1059,8 +1059,7 @@ Node* PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop)
|
||||
// loop is never executed). When that happens, range check
|
||||
// CastII/ConvI2L nodes cause some data paths to die. For consistency,
|
||||
// the control paths must die too but the range checks were removed by
|
||||
// predication. The range checks that we add here guarantee that they
|
||||
// do.
|
||||
// predication. The range checks that we add here guarantee that they do.
|
||||
void PhaseIdealLoop::duplicate_predicates_helper(Node* predicate, Node* castii, IdealLoopTree* outer_loop,
|
||||
LoopNode* outer_main_head, uint dd_main_head) {
|
||||
if (predicate != NULL) {
|
||||
@ -1069,110 +1068,128 @@ void PhaseIdealLoop::duplicate_predicates_helper(Node* predicate, Node* castii,
|
||||
Node* rgn = uncommon_proj->unique_ctrl_out();
|
||||
assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
|
||||
assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
|
||||
predicate = predicate->in(0)->in(0);
|
||||
predicate = iff->in(0);
|
||||
Node* current_proj = outer_main_head->in(LoopNode::EntryControl);
|
||||
Node* prev_proj = current_proj;
|
||||
while (predicate != NULL && predicate->is_Proj() && predicate->in(0)->is_If()) {
|
||||
uncommon_proj = predicate->in(0)->as_If()->proj_out(1 - predicate->as_Proj()->_con);
|
||||
iff = predicate->in(0)->as_If();
|
||||
uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
|
||||
if (uncommon_proj->unique_ctrl_out() != rgn)
|
||||
break;
|
||||
iff = predicate->in(0)->as_If();
|
||||
if (iff->in(1)->Opcode() == Op_Opaque4) {
|
||||
Node_Stack to_clone(2);
|
||||
to_clone.push(iff->in(1), 1);
|
||||
uint current = C->unique();
|
||||
Node* result = NULL;
|
||||
// Look for the opaque node to replace with the init value
|
||||
// and clone everything in between. We keep the Opaque4 node
|
||||
// so the duplicated predicates are eliminated once loop
|
||||
// opts are over: they are here only to keep the IR graph
|
||||
// consistent.
|
||||
do {
|
||||
Node* n = to_clone.node();
|
||||
uint i = to_clone.index();
|
||||
Node* m = n->in(i);
|
||||
int op = m->Opcode();
|
||||
if (m->is_Bool() ||
|
||||
m->is_Cmp() ||
|
||||
op == Op_AndL ||
|
||||
op == Op_OrL ||
|
||||
op == Op_RShiftL ||
|
||||
op == Op_LShiftL ||
|
||||
op == Op_AddL ||
|
||||
op == Op_AddI ||
|
||||
op == Op_MulL ||
|
||||
op == Op_MulI ||
|
||||
op == Op_SubL ||
|
||||
op == Op_SubI ||
|
||||
op == Op_ConvI2L) {
|
||||
to_clone.push(m, 1);
|
||||
continue;
|
||||
}
|
||||
if (op == Op_Opaque1) {
|
||||
if (n->_idx < current) {
|
||||
n = n->clone();
|
||||
}
|
||||
n->set_req(i, castii);
|
||||
register_new_node(n, current_proj);
|
||||
to_clone.set_node(n);
|
||||
}
|
||||
for (;;) {
|
||||
Node* cur = to_clone.node();
|
||||
uint j = to_clone.index();
|
||||
if (j+1 < cur->req()) {
|
||||
to_clone.set_index(j+1);
|
||||
break;
|
||||
}
|
||||
to_clone.pop();
|
||||
if (to_clone.size() == 0) {
|
||||
result = cur;
|
||||
break;
|
||||
}
|
||||
Node* next = to_clone.node();
|
||||
j = to_clone.index();
|
||||
if (cur->_idx >= current) {
|
||||
if (next->_idx < current) {
|
||||
next = next->clone();
|
||||
register_new_node(next, current_proj);
|
||||
to_clone.set_node(next);
|
||||
}
|
||||
assert(next->in(j) != cur, "input should have been cloned");
|
||||
next->set_req(j, cur);
|
||||
}
|
||||
}
|
||||
} while (result == NULL);
|
||||
assert(result->_idx >= current, "new node expected");
|
||||
|
||||
Node* proj = predicate->clone();
|
||||
Node* other_proj = uncommon_proj->clone();
|
||||
Node* new_iff = iff->clone();
|
||||
new_iff->set_req(1, result);
|
||||
proj->set_req(0, new_iff);
|
||||
other_proj->set_req(0, new_iff);
|
||||
Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
|
||||
register_new_node(frame, C->start());
|
||||
// It's impossible for the predicate to fail at runtime. Use
|
||||
// an Halt node.
|
||||
Node* halt = new HaltNode(other_proj, frame);
|
||||
C->root()->add_req(halt);
|
||||
new_iff->set_req(0, prev_proj);
|
||||
|
||||
register_control(new_iff, outer_loop->_parent, prev_proj);
|
||||
register_control(proj, outer_loop->_parent, new_iff);
|
||||
register_control(other_proj, _ltree_root, new_iff);
|
||||
register_control(halt, _ltree_root, other_proj);
|
||||
|
||||
prev_proj = proj;
|
||||
// Clone the predicate twice and initialize one with the initial
|
||||
// value of the loop induction variable. Leave the other predicate
|
||||
// to be initialized when increasing the stride during loop unrolling.
|
||||
prev_proj = update_skeleton_predicate(iff, castii, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
|
||||
Node* value = new Opaque1Node(C, castii);
|
||||
register_new_node(value, current_proj);
|
||||
prev_proj = update_skeleton_predicate(iff, value, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
|
||||
// Remove the skeleton predicate from the pre-loop
|
||||
_igvn.replace_input_of(iff, 1, _igvn.intcon(1));
|
||||
}
|
||||
predicate = predicate->in(0)->in(0);
|
||||
}
|
||||
if (prev_proj != current_proj) {
|
||||
_igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
|
||||
set_idom(outer_main_head, prev_proj, dd_main_head);
|
||||
}
|
||||
_igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
|
||||
set_idom(outer_main_head, prev_proj, dd_main_head);
|
||||
}
|
||||
}
|
||||
|
||||
Node* PhaseIdealLoop::update_skeleton_predicate(Node* iff, Node* value, Node* predicate, Node* uncommon_proj,
|
||||
Node* current_proj, IdealLoopTree* outer_loop, Node* prev_proj) {
|
||||
bool clone = (outer_loop != NULL); // Clone the predicate?
|
||||
Node_Stack to_clone(2);
|
||||
to_clone.push(iff->in(1), 1);
|
||||
uint current = C->unique();
|
||||
Node* result = NULL;
|
||||
// Look for the opaque node to replace with the new value
|
||||
// and clone everything in between. We keep the Opaque4 node
|
||||
// so the duplicated predicates are eliminated once loop
|
||||
// opts are over: they are here only to keep the IR graph
|
||||
// consistent.
|
||||
do {
|
||||
Node* n = to_clone.node();
|
||||
uint i = to_clone.index();
|
||||
Node* m = n->in(i);
|
||||
int op = m->Opcode();
|
||||
if (m->is_Bool() ||
|
||||
m->is_Cmp() ||
|
||||
op == Op_AndL ||
|
||||
op == Op_OrL ||
|
||||
op == Op_RShiftL ||
|
||||
op == Op_LShiftL ||
|
||||
op == Op_AddL ||
|
||||
op == Op_AddI ||
|
||||
op == Op_MulL ||
|
||||
op == Op_MulI ||
|
||||
op == Op_SubL ||
|
||||
op == Op_SubI ||
|
||||
op == Op_ConvI2L) {
|
||||
to_clone.push(m, 1);
|
||||
continue;
|
||||
}
|
||||
if (op == Op_Opaque1) {
|
||||
if (!clone) {
|
||||
// Update the input of the Opaque1Node and exit
|
||||
_igvn.replace_input_of(m, 1, value);
|
||||
return prev_proj;
|
||||
}
|
||||
if (n->_idx < current) {
|
||||
n = n->clone();
|
||||
}
|
||||
n->set_req(i, value);
|
||||
register_new_node(n, current_proj);
|
||||
to_clone.set_node(n);
|
||||
}
|
||||
for (;;) {
|
||||
Node* cur = to_clone.node();
|
||||
uint j = to_clone.index();
|
||||
if (j+1 < cur->req()) {
|
||||
to_clone.set_index(j+1);
|
||||
break;
|
||||
}
|
||||
to_clone.pop();
|
||||
if (to_clone.size() == 0) {
|
||||
result = cur;
|
||||
break;
|
||||
}
|
||||
Node* next = to_clone.node();
|
||||
j = to_clone.index();
|
||||
if (clone && cur->_idx >= current) {
|
||||
if (next->_idx < current) {
|
||||
next = next->clone();
|
||||
register_new_node(next, current_proj);
|
||||
to_clone.set_node(next);
|
||||
}
|
||||
assert(next->in(j) != cur, "input should have been cloned");
|
||||
next->set_req(j, cur);
|
||||
}
|
||||
}
|
||||
} while (result == NULL);
|
||||
if (!clone) {
|
||||
return NULL;
|
||||
}
|
||||
assert(result->_idx >= current, "new node expected");
|
||||
|
||||
Node* proj = predicate->clone();
|
||||
Node* other_proj = uncommon_proj->clone();
|
||||
Node* new_iff = iff->clone();
|
||||
new_iff->set_req(1, result);
|
||||
proj->set_req(0, new_iff);
|
||||
other_proj->set_req(0, new_iff);
|
||||
Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
|
||||
register_new_node(frame, C->start());
|
||||
// It's impossible for the predicate to fail at runtime. Use an Halt node.
|
||||
Node* halt = new HaltNode(other_proj, frame);
|
||||
C->root()->add_req(halt);
|
||||
new_iff->set_req(0, prev_proj);
|
||||
|
||||
register_control(new_iff, outer_loop->_parent, prev_proj);
|
||||
register_control(proj, outer_loop->_parent, new_iff);
|
||||
register_control(other_proj, _ltree_root, new_iff);
|
||||
register_control(halt, _ltree_root, other_proj);
|
||||
return proj;
|
||||
}
|
||||
|
||||
void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop,
|
||||
LoopNode* outer_main_head, uint dd_main_head) {
|
||||
if (UseLoopPredicate) {
|
||||
@ -1682,6 +1699,30 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
|
||||
assert(old_trip_count > 1 &&
|
||||
(!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
|
||||
|
||||
if (UseLoopPredicate) {
|
||||
// Search for skeleton predicates and update them according to the new stride
|
||||
Node* entry = ctrl;
|
||||
while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
|
||||
IfNode* iff = entry->in(0)->as_If();
|
||||
ProjNode* proj = iff->proj_out(1 - entry->as_Proj()->_con);
|
||||
if (proj->unique_ctrl_out()->Opcode() != Op_Halt) {
|
||||
break;
|
||||
}
|
||||
if (iff->in(1)->Opcode() == Op_Opaque4) {
|
||||
// Compute the value of the loop induction variable at the end of the
|
||||
// first iteration of the unrolled loop: init + new_stride_con - init_inc
|
||||
int init_inc = stride_con/loop_head->unrolled_count();
|
||||
assert(init_inc != 0, "invalid loop increment");
|
||||
int new_stride_con = stride_con * 2;
|
||||
Node* max_value = _igvn.intcon(new_stride_con - init_inc);
|
||||
max_value = new AddINode(init, max_value);
|
||||
register_new_node(max_value, get_ctrl(iff->in(1)));
|
||||
update_skeleton_predicate(iff, max_value);
|
||||
}
|
||||
entry = entry->in(0)->in(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Adjust loop limit to keep valid iterations number after unroll.
|
||||
// Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
|
||||
// which may overflow.
|
||||
|
@ -744,6 +744,8 @@ private:
|
||||
LoopNode* outer_main_head, uint dd_main_head);
|
||||
void duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop,
|
||||
LoopNode* outer_main_head, uint dd_main_head);
|
||||
Node* update_skeleton_predicate(Node* iff, Node* value, Node* predicate = NULL, Node* uncommon_proj = NULL,
|
||||
Node* current_proj = NULL, IdealLoopTree* outer_loop = NULL, Node* prev_proj = NULL);
|
||||
|
||||
public:
|
||||
|
||||
|
@ -23,11 +23,12 @@
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8193130
|
||||
* @bug 8193130 8203915
|
||||
* @summary Bad graph when unrolled loop bounds conflicts with range checks
|
||||
*
|
||||
* @run main/othervm IterationSplitPredicateInconsistency
|
||||
* @run main/othervm -XX:-UseLoopPredicate IterationSplitPredicateInconsistency
|
||||
* @run main/othervm -XX:LoopStripMiningIter=0 IterationSplitPredicateInconsistency
|
||||
*
|
||||
*/
|
||||
|
||||
|
@ -23,11 +23,12 @@
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8159016 8202949
|
||||
* @bug 8159016 8202949 8203915
|
||||
* @summary Tests correct dominator information after over-unrolling a loop.
|
||||
* @requires vm.gc == "Parallel" | vm.gc == "null"
|
||||
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions
|
||||
* -Xcomp -XX:-TieredCompilation -XX:-UseSwitchProfiling
|
||||
* -XX:-UseCountedLoopSafepoints -XX:LoopUnrollLimit=250
|
||||
* -XX:-UseG1GC -XX:+UseParallelGC compiler.loopopts.TestOverunrolling
|
||||
*/
|
||||
|
||||
@ -81,11 +82,74 @@ public class TestOverunrolling {
|
||||
}
|
||||
}
|
||||
|
||||
// Similar to test2 but we cannot statically determine the upper bound of
|
||||
// the inner for loop and can therefore not prevent over-unrolling.
|
||||
public static void test3(int[] array) {
|
||||
int[] iArr = new int[8];
|
||||
for (int i = 0; i < array.length; i++) {
|
||||
for (int j = 5; j < i; j++) {
|
||||
int k = 1;
|
||||
do {
|
||||
iArr[j] = 0;
|
||||
switch (k) {
|
||||
case 1:
|
||||
lFld = 0;
|
||||
break;
|
||||
case 10:
|
||||
dFld = 0;
|
||||
break;
|
||||
}
|
||||
} while (++k < 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Similar to test3 but with negative stride and constant outer loop limit
|
||||
public static void test4(int[] array, boolean store) {
|
||||
int[] iArr = new int[8];
|
||||
for (int i = -8; i < 8; i++) {
|
||||
for (int j = 5; j > i; j--) {
|
||||
int k = 1;
|
||||
do {
|
||||
if (store) {
|
||||
iArr[j] = 0;
|
||||
}
|
||||
switch (k) {
|
||||
case 1:
|
||||
lFld = 0;
|
||||
break;
|
||||
case 10:
|
||||
dFld = 0;
|
||||
break;
|
||||
}
|
||||
} while (++k < 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The inner for-loop is over-unrolled and vectorized resulting in
|
||||
// a crash in the matcher because the memory input to a vector is top.
|
||||
public static int test5(int[] array) {
|
||||
int result = 0;
|
||||
int[] iArr = new int[8];
|
||||
for (int i = 0; i < array.length; i++) {
|
||||
for (int j = 5; j < i; j++) {
|
||||
iArr[j] += array[j];
|
||||
result += array[j];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static void main(String args[]) {
|
||||
for (int i = 0; i < 42; ++i) {
|
||||
test1(i);
|
||||
}
|
||||
test2();
|
||||
int[] array = new int[8];
|
||||
test3(array);
|
||||
test4(array, false);
|
||||
test5(array);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user