8186027: C2: loop strip mining

Reviewed-by: kvn, neliasso
This commit is contained in:
Roland Westrelin 2017-11-28 11:59:16 +01:00
parent 46f665881f
commit 386e258e20
17 changed files with 1150 additions and 209 deletions

View File

@ -92,6 +92,16 @@ void G1Arguments::initialize_flags() {
}
log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (unsigned int) (MarkStackSize / K), (uint) (MarkStackSizeMax / K));
#ifdef COMPILER2
// Enable loop strip mining to offer better pause time guarantees
if (FLAG_IS_DEFAULT(UseCountedLoopSafepoints)) {
FLAG_SET_DEFAULT(UseCountedLoopSafepoints, true);
}
if (UseCountedLoopSafepoints && FLAG_IS_DEFAULT(LoopStripMiningIter)) {
FLAG_SET_DEFAULT(LoopStripMiningIter, 1000);
}
#endif
}
CollectedHeap* G1Arguments::create_heap() {

View File

@ -740,6 +740,14 @@
\
develop(bool, RenumberLiveNodes, true, \
"Renumber live nodes") \
\
product(uintx, LoopStripMiningIter, 0, \
"Number of iterations in strip mined loop") \
range(0, max_juint) \
\
product(uintx, LoopStripMiningIterShortLoop, 0, \
"Loop with fewer iterations are not strip mined") \
range(0, max_juint) \
C2_FLAGS(DECLARE_DEVELOPER_FLAG, \
DECLARE_PD_DEVELOPER_FLAG, \

View File

@ -571,6 +571,18 @@ Node *RegionNode::Ideal(PhaseGVN *phase, bool can_reshape) {
return NULL;
} else if (can_reshape) { // Optimization phase - remove the node
PhaseIterGVN *igvn = phase->is_IterGVN();
// Strip mined (inner) loop is going away, remove outer loop.
if (is_CountedLoop() &&
as_Loop()->is_strip_mined()) {
Node* outer_sfpt = as_CountedLoop()->outer_safepoint();
Node* outer_out = as_CountedLoop()->outer_loop_exit();
if (outer_sfpt != NULL && outer_out != NULL) {
Node* in = outer_sfpt->in(0);
igvn->replace_node(outer_out, in);
LoopNode* outer = as_CountedLoop()->outer_loop();
igvn->replace_input_of(outer, LoopNode::LoopBackControl, igvn->C->top());
}
}
Node *parent_ctrl;
if( cnt == 0 ) {
assert( req() == 1, "no inputs expected" );

View File

@ -133,6 +133,8 @@ macro(ConvL2F)
macro(ConvL2I)
macro(CountedLoop)
macro(CountedLoopEnd)
macro(OuterStripMinedLoop)
macro(OuterStripMinedLoopEnd)
macro(CountLeadingZerosI)
macro(CountLeadingZerosL)
macro(CountTrailingZerosI)

View File

@ -3244,9 +3244,11 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
break;
case Op_Loop:
case Op_CountedLoop:
case Op_OuterStripMinedLoop:
if (n->as_Loop()->is_inner_loop()) {
frc.inc_inner_loop_count();
}
n->as_Loop()->verify_strip_mined(0);
break;
case Op_LShiftI:
case Op_RShiftI:
@ -3525,6 +3527,14 @@ bool Compile::final_graph_reshaping() {
record_method_not_compilable("infinite loop");
return true; // Found unvisited kid; must be unreach
}
// Here so verification code in final_graph_reshaping_walk()
// always see an OuterStripMinedLoopEnd
if (n->is_OuterStripMinedLoopEnd()) {
IfNode* init_iff = n->as_If();
Node* iff = new IfNode(init_iff->in(0), init_iff->in(1), init_iff->_prob, init_iff->_fcnt);
n->subsume_by(iff, this);
}
}
// If original bytecodes contained a mixture of floats and doubles

View File

@ -117,6 +117,7 @@ static Node* split_if(IfNode *iff, PhaseIterGVN *igvn) {
// No intervening control, like a simple Call
Node *r = iff->in(0);
if( !r->is_Region() ) return NULL;
if (r->is_Loop() && r->in(LoopNode::LoopBackControl)->is_top()) return NULL; // going away anyway
if( phi->region() != r ) return NULL;
// No other users of the cmp/bool
if (b->outcnt() != 1 || cmp->outcnt() != 1) {

View File

@ -515,8 +515,8 @@ class Invariance : public StackObj {
_visited(area), _invariant(area), _stack(area, 10 /* guess */),
_clone_visited(area), _old_new(area)
{
Node* head = _lpt->_head;
Node* entry = head->in(LoopNode::EntryControl);
LoopNode* head = _lpt->_head->as_Loop();
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
if (entry->outcnt() != 1) {
// If a node is pinned between the predicates and the loop
// entry, we won't be able to move any node in the loop that
@ -801,6 +801,10 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
return false;
}
if (head->is_OuterStripMinedLoop()) {
return false;
}
CountedLoopNode *cl = NULL;
if (head->is_valid_counted_loop()) {
cl = head->as_CountedLoop();
@ -812,7 +816,7 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
cl = NULL;
}
Node* entry = head->in(LoopNode::EntryControl);
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
ProjNode *predicate_proj = NULL;
// Loop limit check predicate should be near the loop.
predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
@ -1007,6 +1011,8 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
}
#endif
head->verify_strip_mined(1);
return hoisted;
}

View File

@ -67,6 +67,16 @@ void IdealLoopTree::record_for_igvn() {
Node *n = _body.at(i);
_phase->_igvn._worklist.push(n);
}
// put body of outer strip mined loop on igvn work list as well
if (_head->is_CountedLoop() && _head->as_Loop()->is_strip_mined()) {
CountedLoopNode* l = _head->as_CountedLoop();
_phase->_igvn._worklist.push(l->outer_loop());
_phase->_igvn._worklist.push(l->outer_loop_tail());
_phase->_igvn._worklist.push(l->outer_loop_end());
_phase->_igvn._worklist.push(l->outer_safepoint());
Node* cle_out = _head->as_CountedLoop()->loopexit()->proj_out(false);
_phase->_igvn._worklist.push(cle_out);
}
}
//------------------------------compute_exact_trip_count-----------------------
@ -494,7 +504,7 @@ void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) {
loop->dump_head();
}
#endif
Node* head = loop->_head;
LoopNode* head = loop->_head->as_Loop();
bool counted_loop = head->is_CountedLoop();
if (counted_loop) {
CountedLoopNode *cl = head->as_CountedLoop();
@ -514,7 +524,7 @@ void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) {
// Step 1: Clone the loop body. The clone becomes the peeled iteration.
// The pre-loop illegally has 2 control users (old & new loops).
clone_loop( loop, old_new, dom_depth(head) );
clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), ControlAroundStripMined);
// Step 2: Make the old-loop fall-in edges point to the peeled iteration.
// Do this by making the old-loop fall-in edges act as if they came
@ -523,8 +533,8 @@ void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) {
// the pre-loop with only 1 user (the new peeled iteration), but the
// peeled-loop backedge has 2 users.
Node* new_entry = old_new[head->in(LoopNode::LoopBackControl)->_idx];
_igvn.hash_delete(head);
head->set_req(LoopNode::EntryControl, new_entry);
_igvn.hash_delete(head->skip_strip_mined());
head->skip_strip_mined()->set_req(LoopNode::EntryControl, new_entry);
for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
Node* old = head->fast_out(j);
if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) {
@ -1009,8 +1019,6 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
CountedLoopEndNode *main_end = main_head->loopexit();
guarantee(main_end != NULL, "no loop exit node");
assert( main_end->outcnt() == 2, "1 true, 1 false path only" );
uint dd_main_head = dom_depth(main_head);
uint max = main_head->outcnt();
Node *pre_header= main_head->in(LoopNode::EntryControl);
Node *init = main_head->init_trip();
@ -1043,7 +1051,16 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
// Step B1: Clone the loop body. The clone becomes the pre-loop. The main
// loop pre-header illegally has 2 control users (old & new loops).
clone_loop( loop, old_new, dd_main_head );
LoopNode* outer_main_head = main_head;
IdealLoopTree* outer_loop = loop;
if (main_head->is_strip_mined()) {
main_head->verify_strip_mined(1);
outer_main_head = main_head->outer_loop();
outer_loop = loop->_parent;
assert(outer_loop->_head == outer_main_head, "broken loop tree");
}
uint dd_main_head = dom_depth(outer_main_head);
clone_loop(loop, old_new, dd_main_head, ControlAroundStripMined);
CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
pre_head->set_pre_loop(main_head);
@ -1058,7 +1075,7 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
_igvn.register_new_node_with_optimizer( new_pre_exit );
set_idom(new_pre_exit, pre_end, dd_main_head);
set_loop(new_pre_exit, loop->_parent);
set_loop(new_pre_exit, outer_loop->_parent);
// Step B2: Build a zero-trip guard for the main-loop. After leaving the
// pre-loop, the main-loop may not execute at all. Later in life this
@ -1075,22 +1092,22 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
_igvn.register_new_node_with_optimizer( min_iff );
set_idom(min_iff, new_pre_exit, dd_main_head);
set_loop(min_iff, loop->_parent);
set_loop(min_iff, outer_loop->_parent);
// Plug in the false-path, taken if we need to skip main-loop
_igvn.hash_delete( pre_exit );
pre_exit->set_req(0, min_iff);
set_idom(pre_exit, min_iff, dd_main_head);
set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
set_idom(pre_exit->unique_ctrl_out(), min_iff, dd_main_head);
// Make the true-path, must enter the main loop
Node *min_taken = new IfTrueNode( min_iff );
_igvn.register_new_node_with_optimizer( min_taken );
set_idom(min_taken, min_iff, dd_main_head);
set_loop(min_taken, loop->_parent);
set_loop(min_taken, outer_loop->_parent);
// Plug in the true path
_igvn.hash_delete( main_head );
main_head->set_req(LoopNode::EntryControl, min_taken);
set_idom(main_head, min_taken, dd_main_head);
_igvn.hash_delete(outer_main_head);
outer_main_head->set_req(LoopNode::EntryControl, min_taken);
set_idom(outer_main_head, min_taken, dd_main_head);
Arena *a = Thread::current()->resource_area();
VectorSet visited(a);
@ -1102,7 +1119,7 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
Node *pre_phi = old_new[main_phi->_idx];
Node *fallpre = clone_up_backedge_goo(pre_head->back_control(),
main_head->init_control(),
main_head->skip_strip_mined()->in(LoopNode::EntryControl),
pre_phi->in(LoopNode::LoopBackControl),
visited, clones);
_igvn.hash_delete(main_phi);
@ -1305,16 +1322,24 @@ void PhaseIdealLoop::insert_scalar_rced_post_loop(IdealLoopTree *loop, Node_List
Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
CountedLoopNode *main_head, CountedLoopEndNode *main_end,
Node *incr, Node *limit, CountedLoopNode *&post_head) {
IfNode* outer_main_end = main_end;
IdealLoopTree* outer_loop = loop;
if (main_head->is_strip_mined()) {
main_head->verify_strip_mined(1);
outer_main_end = main_head->outer_loop_end();
outer_loop = loop->_parent;
assert(outer_loop->_head == main_head->in(LoopNode::EntryControl), "broken loop tree");
}
//------------------------------
// Step A: Create a new post-Loop.
Node* main_exit = main_end->proj_out(false);
Node* main_exit = outer_main_end->proj_out(false);
assert(main_exit->Opcode() == Op_IfFalse, "");
int dd_main_exit = dom_depth(main_exit);
// Step A1: Clone the loop body of main. The clone becomes the post-loop.
// The main loop pre-header illegally has 2 control users (old & new loops).
clone_loop(loop, old_new, dd_main_exit);
clone_loop(loop, old_new, dd_main_exit, ControlAroundStripMined);
assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
post_head = old_new[main_head->_idx]->as_CountedLoop();
post_head->set_normal_loop();
@ -1325,10 +1350,10 @@ Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
post_end->_prob = PROB_FAIR;
// Build the main-loop normal exit.
IfFalseNode *new_main_exit = new IfFalseNode(main_end);
IfFalseNode *new_main_exit = new IfFalseNode(outer_main_end);
_igvn.register_new_node_with_optimizer(new_main_exit);
set_idom(new_main_exit, main_end, dd_main_exit);
set_loop(new_main_exit, loop->_parent);
set_idom(new_main_exit, outer_main_end, dd_main_exit);
set_loop(new_main_exit, outer_loop->_parent);
// Step A2: Build a zero-trip guard for the post-loop. After leaving the
// main-loop, the post-loop may not execute at all. We 'opaque' the incr
@ -1346,7 +1371,7 @@ Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
_igvn.register_new_node_with_optimizer(zer_iff);
set_idom(zer_iff, new_main_exit, dd_main_exit);
set_loop(zer_iff, loop->_parent);
set_loop(zer_iff, outer_loop->_parent);
// Plug in the false-path, taken if we need to skip this post-loop
_igvn.replace_input_of(main_exit, 0, zer_iff);
@ -1356,7 +1381,7 @@ Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
Node *zer_taken = new IfTrueNode(zer_iff);
_igvn.register_new_node_with_optimizer(zer_taken);
set_idom(zer_taken, zer_iff, dd_main_exit);
set_loop(zer_taken, loop->_parent);
set_loop(zer_taken, outer_loop->_parent);
// Plug in the true path
_igvn.hash_delete(post_head);
post_head->set_req(LoopNode::EntryControl, zer_taken);
@ -1431,7 +1456,7 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
// if rounds of unroll,optimize are making progress
loop_head->set_node_count_before_unroll(loop->_body.size());
Node *ctrl = loop_head->in(LoopNode::EntryControl);
Node *ctrl = loop_head->skip_strip_mined()->in(LoopNode::EntryControl);
Node *limit = loop_head->limit();
Node *init = loop_head->init_trip();
Node *stride = loop_head->stride();
@ -1610,7 +1635,7 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
// represents the odd iterations; since the loop trips an even number of
// times its backedge is never taken. Kill the backedge.
uint dd = dom_depth(loop_head);
clone_loop( loop, old_new, dd );
clone_loop(loop, old_new, dd, IgnoreStripMined);
// Make backedges of the clone equal to backedges of the original.
// Make the fall-in from the original come from the fall-out of the clone.
@ -1653,6 +1678,7 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
}
loop->record_for_igvn();
loop_head->clear_strip_mined();
#ifndef PRODUCT
if (C->do_vector_loop() && (PrintOpto && (VerifyLoopOptimizations || TraceLoopOpts))) {
@ -2047,7 +2073,7 @@ int PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
}
// Need to find the main-loop zero-trip guard
Node *ctrl = cl->in(LoopNode::EntryControl);
Node *ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl);
Node *iffm = ctrl->in(0);
Node *opqzm = iffm->in(1)->in(1)->in(2);
assert(opqzm->in(1) == main_limit, "do not understand situation");
@ -2413,7 +2439,6 @@ bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoop
_igvn.register_new_node_with_optimizer(cur_min);
Node *cmp_node = rce_loop_end->cmp_node();
_igvn.replace_input_of(cmp_node, 2, cur_min);
set_idom(cmp_node, cur_min, dom_depth(ctrl));
set_ctrl(cur_min, ctrl);
set_loop(cur_min, rce_loop->_parent);
@ -2519,7 +2544,7 @@ void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) {
#ifdef ASSERT
static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) {
Node *ctrl = cl->in(LoopNode::EntryControl);
Node *ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl);
assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "");
Node *iffm = ctrl->in(0);
assert(iffm->Opcode() == Op_If, "");
@ -2558,7 +2583,7 @@ void IdealLoopTree::remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *
}
assert(locate_pre_from_main(main_head) == cl, "bad main loop");
Node* main_iff = main_head->in(LoopNode::EntryControl)->in(0);
Node* main_iff = main_head->skip_strip_mined()->in(LoopNode::EntryControl)->in(0);
// Remove the Opaque1Node of the pre loop and make it execute all iterations
phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2));
@ -2619,7 +2644,7 @@ bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
}
if (needs_guard) {
// Check for an obvious zero trip guard.
Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl));
Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->skip_strip_mined()->in(LoopNode::EntryControl));
if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) {
bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
// The test should look like just the backedge of a CountedLoop
@ -3167,6 +3192,8 @@ bool PhaseIdealLoop::intrinsify_fill(IdealLoopTree* lpt) {
return false;
}
head->verify_strip_mined(1);
// Check that the body only contains a store of a loop invariant
// value that is indexed by the loop phi.
Node* store = NULL;
@ -3288,6 +3315,16 @@ bool PhaseIdealLoop::intrinsify_fill(IdealLoopTree* lpt) {
}
*/
if (head->is_strip_mined()) {
// Inner strip mined loop goes away so get rid of outer strip
// mined loop
Node* outer_sfpt = head->outer_safepoint();
Node* in = outer_sfpt->in(0);
Node* outer_out = head->outer_loop_exit();
lazy_replace(outer_out, in);
_igvn.replace_input_of(outer_sfpt, 0, C->top());
}
// Redirect the old control and memory edges that are outside the loop.
// Sometimes the memory phi of the head is used as the outgoing
// state of the loop. It's safe in this case to replace it with the

View File

@ -132,11 +132,11 @@ void PhaseIdealLoop::do_unswitching (IdealLoopTree *loop, Node_List &old_new) {
head->as_CountedLoop()->set_normal_loop();
}
ProjNode* proj_true = create_slow_version_of_loop(loop, old_new, unswitch_iff->Opcode());
ProjNode* proj_true = create_slow_version_of_loop(loop, old_new, unswitch_iff->Opcode(), CloneIncludesStripMined);
#ifdef ASSERT
Node* uniqc = proj_true->unique_ctrl_out();
Node* entry = head->in(LoopNode::EntryControl);
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
Node* predicate = find_predicate(entry);
if (predicate != NULL && UseLoopPredicate) {
// We may have two predicates, find first.
@ -145,7 +145,8 @@ void PhaseIdealLoop::do_unswitching (IdealLoopTree *loop, Node_List &old_new) {
}
if (predicate != NULL) predicate = predicate->in(0);
assert(proj_true->is_IfTrue() &&
(predicate == NULL && uniqc == head ||
(predicate == NULL && uniqc == head && !head->is_strip_mined() ||
predicate == NULL && uniqc == head->in(LoopNode::EntryControl) && head->is_strip_mined() ||
predicate != NULL && uniqc == predicate), "by construction");
#endif
// Increment unswitch count
@ -223,13 +224,16 @@ void PhaseIdealLoop::do_unswitching (IdealLoopTree *loop, Node_List &old_new) {
// Return control projection of the entry to the fast version.
ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
Node_List &old_new,
int opcode) {
int opcode,
CloneLoopMode mode) {
LoopNode* head = loop->_head->as_Loop();
bool counted_loop = head->is_CountedLoop();
Node* entry = head->in(LoopNode::EntryControl);
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
_igvn.rehash_node_delayed(entry);
IdealLoopTree* outer_loop = loop->_parent;
head->verify_strip_mined(1);
Node *cont = _igvn.intcon(1);
set_ctrl(cont, C->root());
Node* opq = new Opaque1Node(C, cont);
@ -247,19 +251,21 @@ ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
// Clone the loop body. The clone becomes the fast loop. The
// original pre-header will (illegally) have 3 control users
// (old & new loops & new if).
clone_loop(loop, old_new, dom_depth(head), iff);
clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), mode, iff);
assert(old_new[head->_idx]->is_Loop(), "" );
// Fast (true) control
Node* iffast_pred = clone_loop_predicates(entry, iffast, !counted_loop);
_igvn.replace_input_of(head, LoopNode::EntryControl, iffast_pred);
set_idom(head, iffast_pred, dom_depth(head));
// Slow (false) control
Node* ifslow_pred = clone_loop_predicates(entry, ifslow, !counted_loop);
LoopNode* slow_head = old_new[head->_idx]->as_Loop();
_igvn.replace_input_of(slow_head, LoopNode::EntryControl, ifslow_pred);
set_idom(slow_head, ifslow_pred, dom_depth(slow_head));
Node* l = head->skip_strip_mined();
_igvn.replace_input_of(l, LoopNode::EntryControl, iffast_pred);
set_idom(l, iffast_pred, dom_depth(l));
LoopNode* slow_l = old_new[head->_idx]->as_Loop()->skip_strip_mined();
_igvn.replace_input_of(slow_l, LoopNode::EntryControl, ifslow_pred);
set_idom(slow_l, ifslow_pred, dom_depth(l));
recompute_dom_depth();
@ -270,9 +276,9 @@ LoopNode* PhaseIdealLoop::create_reserve_version_of_loop(IdealLoopTree *loop, Co
Node_List old_new;
LoopNode* head = loop->_head->as_Loop();
bool counted_loop = head->is_CountedLoop();
Node* entry = head->in(LoopNode::EntryControl);
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
_igvn.rehash_node_delayed(entry);
IdealLoopTree* outer_loop = loop->_parent;
IdealLoopTree* outer_loop = head->is_strip_mined() ? loop->_parent->_parent : loop->_parent;
ConINode* const_1 = _igvn.intcon(1);
set_ctrl(const_1, C->root());
@ -286,7 +292,7 @@ LoopNode* PhaseIdealLoop::create_reserve_version_of_loop(IdealLoopTree *loop, Co
// Clone the loop body. The clone becomes the fast loop. The
// original pre-header will (illegally) have 3 control users
// (old & new loops & new if).
clone_loop(loop, old_new, dom_depth(head), iff);
clone_loop(loop, old_new, dom_depth(head), CloneIncludesStripMined, iff);
assert(old_new[head->_idx]->is_Loop(), "" );
LoopNode* slow_head = old_new[head->_idx]->as_Loop();
@ -303,9 +309,9 @@ LoopNode* PhaseIdealLoop::create_reserve_version_of_loop(IdealLoopTree *loop, Co
#endif
// Fast (true) control
_igvn.replace_input_of(head, LoopNode::EntryControl, iffast);
_igvn.replace_input_of(head->skip_strip_mined(), LoopNode::EntryControl, iffast);
// Slow (false) control
_igvn.replace_input_of(slow_head, LoopNode::EntryControl, ifslow);
_igvn.replace_input_of(slow_head->skip_strip_mined(), LoopNode::EntryControl, ifslow);
recompute_dom_depth();
@ -394,7 +400,7 @@ bool CountedLoopReserveKit::create_reserve() {
return false;
}
Node* ifslow_pred = _lp_reserved->as_CountedLoop()->in(LoopNode::EntryControl);
Node* ifslow_pred = _lp_reserved->skip_strip_mined()->in(LoopNode::EntryControl);
if (!ifslow_pred->is_IfFalse()) {
return false;

View File

@ -261,8 +261,68 @@ void PhaseIdealLoop::set_subtree_ctrl( Node *n ) {
set_early_ctrl( n );
}
// Create a skeleton strip mined outer loop: a Loop head before the
// inner strip mined loop, a safepoint and an exit condition guarded
// by an opaque node after the inner strip mined loop with a backedge
// to the loop head. The inner strip mined loop is left as it is. Only
// once loop optimizations are over, do we adjust the inner loop exit
// condition to limit its number of iterations, set the outer loop
// exit condition and add Phis to the outer loop head. Some loop
// optimizations that operate on the inner strip mined loop need to be
// aware of the outer strip mined loop: loop unswitching needs to
// clone the outer loop as well as the inner, unrolling needs to only
// clone the inner loop etc. No optimizations need to change the outer
// strip mined loop as it is only a skeleton.
IdealLoopTree* PhaseIdealLoop::create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
IdealLoopTree* loop, float cl_prob, float le_fcnt,
Node*& entry_control, Node*& iffalse) {
Node* outer_test = _igvn.intcon(0);
set_ctrl(outer_test, C->root());
Node *orig = iffalse;
iffalse = iffalse->clone();
_igvn.register_new_node_with_optimizer(iffalse);
set_idom(iffalse, idom(orig), dom_depth(orig));
IfNode *outer_le = new OuterStripMinedLoopEndNode(iffalse, outer_test, cl_prob, le_fcnt);
Node *outer_ift = new IfTrueNode (outer_le);
Node* outer_iff = orig;
_igvn.replace_input_of(outer_iff, 0, outer_le);
LoopNode *outer_l = new OuterStripMinedLoopNode(C, init_control, outer_ift);
entry_control = outer_l;
IdealLoopTree* outer_ilt = new IdealLoopTree(this, outer_l, outer_ift);
IdealLoopTree* parent = loop->_parent;
IdealLoopTree* sibling = parent->_child;
if (sibling == loop) {
parent->_child = outer_ilt;
} else {
while (sibling->_next != loop) {
sibling = sibling->_next;
}
sibling->_next = outer_ilt;
}
outer_ilt->_next = loop->_next;
outer_ilt->_parent = parent;
outer_ilt->_child = loop;
outer_ilt->_nest = loop->_nest;
loop->_parent = outer_ilt;
loop->_next = NULL;
loop->_nest++;
set_loop(iffalse, outer_ilt);
register_control(outer_le, outer_ilt, iffalse);
register_control(outer_ift, outer_ilt, outer_le);
set_idom(outer_iff, outer_le, dom_depth(outer_le));
_igvn.register_new_node_with_optimizer(outer_l);
set_loop(outer_l, outer_ilt);
set_idom(outer_l, init_control, dom_depth(init_control)+1);
return outer_ilt;
}
//------------------------------is_counted_loop--------------------------------
bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
PhaseGVN *gvn = &_igvn;
// Counted loop head must be a good RegionNode with only 3 not NULL
@ -280,7 +340,7 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
// Allow funny placement of Safepoint
if (back_control->Opcode() == Op_SafePoint) {
if (UseCountedLoopSafepoints) {
if (LoopStripMiningIter != 0) {
// Leaving the safepoint on the backedge and creating a
// CountedLoop will confuse optimizations. We can't move the
// safepoint around because its jvm state wouldn't match a new
@ -600,7 +660,7 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
}
set_subtree_ctrl( limit );
if (!UseCountedLoopSafepoints) {
if (LoopStripMiningIter == 0) {
// Check for SafePoint on backedge and remove
Node *sfpt = x->in(LoopNode::LoopBackControl);
if (sfpt->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt)) {
@ -683,8 +743,20 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
assert(iff->outcnt() == 0, "should be dead now");
lazy_replace( iff, le ); // fix 'get_ctrl'
Node *sfpt2 = le->in(0);
Node* entry_control = init_control;
bool strip_mine_loop = LoopStripMiningIter > 1 && loop->_child == NULL &&
sfpt2->Opcode() == Op_SafePoint && !loop->_has_call;
IdealLoopTree* outer_ilt = NULL;
if (strip_mine_loop) {
outer_ilt = create_outer_strip_mined_loop(test, cmp, init_control, loop,
cl_prob, le->_fcnt, entry_control,
iffalse);
}
// Now setup a new CountedLoopNode to replace the existing LoopNode
CountedLoopNode *l = new CountedLoopNode(init_control, back_control);
CountedLoopNode *l = new CountedLoopNode(entry_control, back_control);
l->set_unswitch_count(x->as_Loop()->unswitch_count()); // Preserve
// The following assert is approximately true, and defines the intention
// of can_be_counted_loop. It fails, however, because phase->type
@ -696,12 +768,19 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
// Fix all data nodes placed at the old loop head.
// Uses the lazy-update mechanism of 'get_ctrl'.
lazy_replace( x, l );
set_idom(l, init_control, dom_depth(x));
set_idom(l, entry_control, dom_depth(entry_control) + 1);
if (!UseCountedLoopSafepoints) {
if (LoopStripMiningIter == 0 || strip_mine_loop) {
// Check for immediately preceding SafePoint and remove
Node *sfpt2 = le->in(0);
if (sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2)) {
if (sfpt2->Opcode() == Op_SafePoint && (LoopStripMiningIter != 0 || is_deleteable_safept(sfpt2))) {
if (strip_mine_loop) {
Node* outer_le = outer_ilt->_tail->in(0);
Node* sfpt = sfpt2->clone();
sfpt->set_req(0, iffalse);
outer_le->set_req(0, sfpt);
register_control(sfpt, outer_ilt, iffalse);
set_idom(outer_le, sfpt, dom_depth(sfpt));
}
lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control));
if (loop->_safepts != NULL) {
loop->_safepts->yank(sfpt2);
@ -730,6 +809,13 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
// bounds
l->phi()->as_Phi()->set_type(l->phi()->Value(&_igvn));
if (strip_mine_loop) {
l->mark_strip_mined();
l->verify_strip_mined(1);
outer_ilt->_head->as_Loop()->verify_strip_mined(1);
loop = outer_ilt;
}
return true;
}
@ -776,12 +862,93 @@ Node* PhaseIdealLoop::exact_limit( IdealLoopTree *loop ) {
// Return a node which is more "ideal" than the current node.
// Attempt to convert into a counted-loop.
Node *LoopNode::Ideal(PhaseGVN *phase, bool can_reshape) {
if (!can_be_counted_loop(phase)) {
if (!can_be_counted_loop(phase) && !is_OuterStripMinedLoop()) {
phase->C->set_major_progress();
}
return RegionNode::Ideal(phase, can_reshape);
}
void LoopNode::verify_strip_mined(int expect_skeleton) const {
#ifdef ASSERT
const OuterStripMinedLoopNode* outer = NULL;
const CountedLoopNode* inner = NULL;
if (is_strip_mined()) {
assert(is_CountedLoop(), "no Loop should be marked strip mined");
inner = as_CountedLoop();
outer = inner->in(LoopNode::EntryControl)->as_OuterStripMinedLoop();
} else if (is_OuterStripMinedLoop()) {
outer = this->as_OuterStripMinedLoop();
inner = outer->unique_ctrl_out()->as_CountedLoop();
assert(!is_strip_mined(), "outer loop shouldn't be marked strip mined");
}
if (inner != NULL || outer != NULL) {
assert(inner != NULL && outer != NULL, "missing loop in strip mined nest");
Node* outer_tail = outer->in(LoopNode::LoopBackControl);
Node* outer_le = outer_tail->in(0);
assert(outer_le->Opcode() == Op_OuterStripMinedLoopEnd, "tail of outer loop should be an If");
Node* sfpt = outer_le->in(0);
assert(sfpt->Opcode() == Op_SafePoint, "where's the safepoint?");
Node* inner_out = sfpt->in(0);
if (inner_out->outcnt() != 1) {
ResourceMark rm;
Unique_Node_List wq;
for (DUIterator_Fast imax, i = inner_out->fast_outs(imax); i < imax; i++) {
Node* u = inner_out->fast_out(i);
if (u == sfpt) {
continue;
}
wq.clear();
wq.push(u);
bool found_sfpt = false;
for (uint next = 0; next < wq.size() && !found_sfpt; next++) {
Node *n = wq.at(next);
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && !found_sfpt; i++) {
Node* u = n->fast_out(i);
if (u == sfpt) {
found_sfpt = true;
}
if (!u->is_CFG()) {
wq.push(u);
}
}
}
assert(found_sfpt, "no node in loop that's not input to safepoint");
}
}
CountedLoopEndNode* cle = inner_out->in(0)->as_CountedLoopEnd();
assert(cle == inner->loopexit(), "mismatch");
bool has_skeleton = outer_le->in(1)->bottom_type()->singleton() && outer_le->in(1)->bottom_type()->is_int()->get_con() == 0;
if (has_skeleton) {
assert(expect_skeleton == 1 || expect_skeleton == -1, "unexpected skeleton node");
assert(outer->outcnt() == 2, "only phis");
} else {
assert(expect_skeleton == 0 || expect_skeleton == -1, "no skeleton node?");
uint phis = 0;
for (DUIterator_Fast imax, i = inner->fast_outs(imax); i < imax; i++) {
Node* u = inner->fast_out(i);
if (u->is_Phi()) {
phis++;
}
}
for (DUIterator_Fast imax, i = outer->fast_outs(imax); i < imax; i++) {
Node* u = outer->fast_out(i);
assert(u == outer || u == inner || u->is_Phi(), "nothing between inner and outer loop");
}
uint stores = 0;
for (DUIterator_Fast imax, i = inner_out->fast_outs(imax); i < imax; i++) {
Node* u = inner_out->fast_out(i);
if (u->is_Store()) {
stores++;
}
}
assert(outer->outcnt() >= phis + 2 && outer->outcnt() <= phis + 2 + stores + 1, "only phis");
}
assert(sfpt->outcnt() == 1, "no data node");
assert(outer_tail->outcnt() == 1 || !has_skeleton, "no data node");
}
#endif
}
//=============================================================================
//------------------------------Ideal------------------------------------------
@ -802,6 +969,7 @@ void CountedLoopNode::dump_spec(outputStream *st) const {
if (is_pre_loop ()) st->print("pre of N%d" , _main_idx);
if (is_main_loop()) st->print("main of N%d", _idx);
if (is_post_loop()) st->print("post of N%d", _main_idx);
if (is_strip_mined()) st->print(" strip mined");
}
#endif
@ -990,6 +1158,365 @@ Node* CountedLoopNode::match_incr_with_optional_truncation(
return NULL;
}
LoopNode* CountedLoopNode::skip_strip_mined(int expect_opaq) {
if (is_strip_mined()) {
verify_strip_mined(expect_opaq);
return in(EntryControl)->as_Loop();
}
return this;
}
OuterStripMinedLoopNode* CountedLoopNode::outer_loop() const {
assert(is_strip_mined(), "not a strip mined loop");
Node* c = in(EntryControl);
if (c == NULL || c->is_top() || !c->is_OuterStripMinedLoop()) {
return NULL;
}
return c->as_OuterStripMinedLoop();
}
IfTrueNode* OuterStripMinedLoopNode::outer_loop_tail() const {
Node* c = in(LoopBackControl);
if (c == NULL || c->is_top()) {
return NULL;
}
return c->as_IfTrue();
}
IfTrueNode* CountedLoopNode::outer_loop_tail() const {
LoopNode* l = outer_loop();
if (l == NULL) {
return NULL;
}
return l->outer_loop_tail();
}
OuterStripMinedLoopEndNode* OuterStripMinedLoopNode::outer_loop_end() const {
IfTrueNode* proj = outer_loop_tail();
if (proj == NULL) {
return NULL;
}
Node* c = proj->in(0);
if (c == NULL || c->is_top() || c->outcnt() != 2) {
return NULL;
}
return c->as_OuterStripMinedLoopEnd();
}
OuterStripMinedLoopEndNode* CountedLoopNode::outer_loop_end() const {
LoopNode* l = outer_loop();
if (l == NULL) {
return NULL;
}
return l->outer_loop_end();
}
IfFalseNode* OuterStripMinedLoopNode::outer_loop_exit() const {
IfNode* le = outer_loop_end();
if (le == NULL) {
return NULL;
}
Node* c = le->proj_out(false);
if (c == NULL) {
return NULL;
}
return c->as_IfFalse();
}
IfFalseNode* CountedLoopNode::outer_loop_exit() const {
LoopNode* l = outer_loop();
if (l == NULL) {
return NULL;
}
return l->outer_loop_exit();
}
SafePointNode* OuterStripMinedLoopNode::outer_safepoint() const {
IfNode* le = outer_loop_end();
if (le == NULL) {
return NULL;
}
Node* c = le->in(0);
if (c == NULL || c->is_top()) {
return NULL;
}
assert(c->Opcode() == Op_SafePoint, "broken outer loop");
return c->as_SafePoint();
}
SafePointNode* CountedLoopNode::outer_safepoint() const {
LoopNode* l = outer_loop();
if (l == NULL) {
return NULL;
}
return l->outer_safepoint();
}
void OuterStripMinedLoopNode::adjust_strip_mined_loop(PhaseIterGVN* igvn) {
// Look for the outer & inner strip mined loop, reduce number of
// iterations of the inner loop, set exit condition of outer loop,
// construct required phi nodes for outer loop.
CountedLoopNode* inner_cl = unique_ctrl_out()->as_CountedLoop();
assert(inner_cl->is_strip_mined(), "inner loop should be strip mined");
Node* inner_iv_phi = inner_cl->phi();
if (inner_iv_phi == NULL) {
return;
}
CountedLoopEndNode* inner_cle = inner_cl->loopexit();
int stride = inner_cl->stride_con();
jlong scaled_iters_long = ((jlong)LoopStripMiningIter) * ABS(stride);
int scaled_iters = (int)scaled_iters_long;
int short_scaled_iters = LoopStripMiningIterShortLoop* ABS(stride);
const TypeInt* inner_iv_t = igvn->type(inner_iv_phi)->is_int();
jlong iter_estimate = (jlong)inner_iv_t->_hi - (jlong)inner_iv_t->_lo;
assert(iter_estimate > 0, "broken");
if ((jlong)scaled_iters != scaled_iters_long || iter_estimate <= short_scaled_iters) {
// Remove outer loop and safepoint (too few iterations)
Node* outer_sfpt = outer_safepoint();
Node* outer_out = outer_loop_exit();
igvn->replace_node(outer_out, outer_sfpt->in(0));
igvn->replace_input_of(outer_sfpt, 0, igvn->C->top());
inner_cl->clear_strip_mined();
return;
}
if (iter_estimate <= scaled_iters_long) {
// We would only go through one iteration of
// the outer loop: drop the outer loop but
// keep the safepoint so we don't run for
// too long without a safepoint
IfNode* outer_le = outer_loop_end();
Node* iff = igvn->transform(new IfNode(outer_le->in(0), outer_le->in(1), outer_le->_prob, outer_le->_fcnt));
igvn->replace_node(outer_le, iff);
inner_cl->clear_strip_mined();
return;
}
Node* cle_tail = inner_cle->proj_out(true);
ResourceMark rm;
Node_List old_new;
if (cle_tail->outcnt() > 1) {
// Look for nodes on backedge of inner loop and clone them
Unique_Node_List backedge_nodes;
for (DUIterator_Fast imax, i = cle_tail->fast_outs(imax); i < imax; i++) {
Node* u = cle_tail->fast_out(i);
if (u != inner_cl) {
assert(!u->is_CFG(), "control flow on the backedge?");
backedge_nodes.push(u);
}
}
uint last = igvn->C->unique();
for (uint next = 0; next < backedge_nodes.size(); next++) {
Node* n = backedge_nodes.at(next);
old_new.map(n->_idx, n->clone());
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* u = n->fast_out(i);
assert(!u->is_CFG(), "broken");
if (u->_idx >= last) {
continue;
}
if (!u->is_Phi()) {
backedge_nodes.push(u);
} else {
assert(u->in(0) == inner_cl, "strange phi on the backedge");
}
}
}
// Put the clones on the outer loop backedge
Node* le_tail = outer_loop_tail();
for (uint next = 0; next < backedge_nodes.size(); next++) {
Node *n = old_new[backedge_nodes.at(next)->_idx];
for (uint i = 1; i < n->req(); i++) {
if (n->in(i) != NULL && old_new[n->in(i)->_idx] != NULL) {
n->set_req(i, old_new[n->in(i)->_idx]);
}
}
if (n->in(0) != NULL) {
assert(n->in(0) == cle_tail, "node not on backedge?");
n->set_req(0, le_tail);
}
igvn->register_new_node_with_optimizer(n);
}
}
Node* iv_phi = NULL;
// Make a clone of each phi in the inner loop
// for the outer loop
for (uint i = 0; i < inner_cl->outcnt(); i++) {
Node* u = inner_cl->raw_out(i);
if (u->is_Phi()) {
assert(u->in(0) == inner_cl, "inconsistent");
Node* phi = u->clone();
phi->set_req(0, this);
Node* be = old_new[phi->in(LoopNode::LoopBackControl)->_idx];
if (be != NULL) {
phi->set_req(LoopNode::LoopBackControl, be);
}
phi = igvn->transform(phi);
igvn->replace_input_of(u, LoopNode::EntryControl, phi);
if (u == inner_iv_phi) {
iv_phi = phi;
}
}
}
Node* cle_out = inner_cle->proj_out(false);
if (cle_out->outcnt() > 1) {
// Look for chains of stores that were sunk
// out of the inner loop and are in the outer loop
for (DUIterator_Fast imax, i = cle_out->fast_outs(imax); i < imax; i++) {
Node* u = cle_out->fast_out(i);
if (u->is_Store()) {
Node* first = u;
for(;;) {
Node* next = first->in(MemNode::Memory);
if (!next->is_Store() || next->in(0) != cle_out) {
break;
}
first = next;
}
Node* last = u;
for(;;) {
Node* next = NULL;
for (DUIterator_Fast jmax, j = last->fast_outs(jmax); j < jmax; j++) {
Node* uu = last->fast_out(j);
if (uu->is_Store() && uu->in(0) == cle_out) {
assert(next == NULL, "only one in the outer loop");
next = uu;
}
}
if (next == NULL) {
break;
}
last = next;
}
Node* phi = NULL;
for (DUIterator_Fast jmax, j = fast_outs(jmax); j < jmax; j++) {
Node* uu = fast_out(j);
if (uu->is_Phi()) {
Node* be = uu->in(LoopNode::LoopBackControl);
while (be->is_Store() && old_new[be->_idx] != NULL) {
ShouldNotReachHere();
be = be->in(MemNode::Memory);
}
if (be == last || be == first->in(MemNode::Memory)) {
assert(phi == NULL, "only one phi");
phi = uu;
}
}
}
#ifdef ASSERT
for (DUIterator_Fast jmax, j = fast_outs(jmax); j < jmax; j++) {
Node* uu = fast_out(j);
if (uu->is_Phi() && uu->bottom_type() == Type::MEMORY) {
if (uu->adr_type() == igvn->C->get_adr_type(igvn->C->get_alias_index(u->adr_type()))) {
assert(phi == uu, "what's that phi?");
} else if (uu->adr_type() == TypePtr::BOTTOM) {
Node* n = uu->in(LoopNode::LoopBackControl);
uint limit = igvn->C->live_nodes();
uint i = 0;
while (n != uu) {
i++;
assert(i < limit, "infinite loop");
if (n->is_Proj()) {
n = n->in(0);
} else if (n->is_SafePoint() || n->is_MemBar()) {
n = n->in(TypeFunc::Memory);
} else if (n->is_Phi()) {
n = n->in(1);
} else if (n->is_MergeMem()) {
n = n->as_MergeMem()->memory_at(igvn->C->get_alias_index(u->adr_type()));
} else if (n->is_Store() || n->is_LoadStore() || n->is_ClearArray()) {
n = n->in(MemNode::Memory);
} else {
n->dump();
ShouldNotReachHere();
}
}
}
}
}
#endif
if (phi == NULL) {
// If the an entire chains was sunk, the
// inner loop has no phi for that memory
// slice, create one for the outer loop
phi = PhiNode::make(this, first->in(MemNode::Memory), Type::MEMORY,
igvn->C->get_adr_type(igvn->C->get_alias_index(u->adr_type())));
phi->set_req(LoopNode::LoopBackControl, last);
phi = igvn->transform(phi);
igvn->replace_input_of(first, MemNode::Memory, phi);
} else {
// Or fix the outer loop fix to include
// that chain of stores.
Node* be = phi->in(LoopNode::LoopBackControl);
while (be->is_Store() && old_new[be->_idx] != NULL) {
ShouldNotReachHere();
be = be->in(MemNode::Memory);
}
if (be == first->in(MemNode::Memory)) {
if (be == phi->in(LoopNode::LoopBackControl)) {
igvn->replace_input_of(phi, LoopNode::LoopBackControl, last);
} else {
igvn->replace_input_of(be, MemNode::Memory, last);
}
} else {
#ifdef ASSERT
if (be == phi->in(LoopNode::LoopBackControl)) {
assert(phi->in(LoopNode::LoopBackControl) == last, "");
} else {
assert(be->in(MemNode::Memory) == last, "");
}
#endif
}
}
}
}
}
if (iv_phi != NULL) {
// Now adjust the inner loop's exit condition
Node* limit = inner_cl->limit();
Node* sub = NULL;
if (stride > 0) {
sub = igvn->transform(new SubINode(limit, iv_phi));
} else {
sub = igvn->transform(new SubINode(iv_phi, limit));
}
Node* min = igvn->transform(new MinINode(sub, igvn->intcon(scaled_iters)));
Node* new_limit = NULL;
if (stride > 0) {
new_limit = igvn->transform(new AddINode(min, iv_phi));
} else {
new_limit = igvn->transform(new SubINode(iv_phi, min));
}
igvn->replace_input_of(inner_cle->cmp_node(), 2, new_limit);
Node* cmp = inner_cle->cmp_node()->clone();
Node* bol = inner_cle->in(CountedLoopEndNode::TestValue)->clone();
cmp->set_req(2, limit);
bol->set_req(1, igvn->transform(cmp));
igvn->replace_input_of(outer_loop_end(), 1, igvn->transform(bol));
} else {
assert(false, "should be able to adjust outer loop");
IfNode* outer_le = outer_loop_end();
Node* iff = igvn->transform(new IfNode(outer_le->in(0), outer_le->in(1), outer_le->_prob, outer_le->_fcnt));
igvn->replace_node(outer_le, iff);
inner_cl->clear_strip_mined();
}
}
const Type* OuterStripMinedLoopEndNode::Value(PhaseGVN* phase) const {
if (!in(0)) return Type::TOP;
if (phase->type(in(0)) == Type::TOP)
return Type::TOP;
return TypeTuple::IFBOTH;
}
Node *OuterStripMinedLoopEndNode::Ideal(PhaseGVN *phase, bool can_reshape) {
if (remove_dead_region(phase, can_reshape)) return this;
return NULL;
}
//------------------------------filtered_type--------------------------------
// Return a type based on condition control flow
@ -1778,10 +2305,11 @@ void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) {
if (_head->is_Loop()) _head->as_Loop()->set_inner_loop();
}
IdealLoopTree* loop = this;
if (_head->is_CountedLoop() ||
phase->is_counted_loop(_head, this)) {
phase->is_counted_loop(_head, loop)) {
if (!UseCountedLoopSafepoints) {
if (LoopStripMiningIter == 0 || (LoopStripMiningIter > 1 && _child == NULL)) {
// Indicate we do not need a safepoint here
_has_sfpt = 1;
}
@ -1800,8 +2328,10 @@ void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) {
}
// Recursively
if (_child) _child->counted_loop( phase );
if (_next) _next ->counted_loop( phase );
assert(loop->_child != this || (loop->_head->as_Loop()->is_OuterStripMinedLoop() && _head->as_CountedLoop()->is_strip_mined()), "what kind of loop was added?");
assert(loop->_child != this || (loop->_child->_child == NULL && loop->_child->_next == NULL), "would miss some loops");
if (loop->_child && loop->_child != this) loop->_child->counted_loop(phase);
if (loop->_next) loop->_next ->counted_loop(phase);
}
#ifndef PRODUCT
@ -1812,7 +2342,7 @@ void IdealLoopTree::dump_head( ) const {
tty->print(" ");
tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
if (_irreducible) tty->print(" IRREDUCIBLE");
Node* entry = _head->in(LoopNode::EntryControl);
Node* entry = _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl);
Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
if (predicate != NULL ) {
tty->print(" limit_check");
@ -1863,6 +2393,9 @@ void IdealLoopTree::dump_head( ) const {
if (Verbose) {
tty->print(" body={"); _body.dump_simple(); tty->print(" }");
}
if (_head->as_Loop()->is_strip_mined()) {
tty->print(" strip_mined");
}
tty->cr();
}
@ -3232,7 +3765,7 @@ bool PhaseIdealLoop::is_canonical_loop_entry(CountedLoopNode* cl) {
if (!cl->is_main_loop() && !cl->is_post_loop()) {
return false;
}
Node* ctrl = cl->in(LoopNode::EntryControl);
Node* ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl);
if (ctrl == NULL || (!ctrl->is_IfTrue() && !ctrl->is_IfFalse())) {
return false;
}
@ -3292,7 +3825,7 @@ Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
}
while(worklist.size() != 0 && LCA != early) {
Node* s = worklist.pop();
if (s->is_Load()) {
if (s->is_Load() || s->Opcode() == Op_SafePoint) {
continue;
} else if (s->is_MergeMem()) {
for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
@ -3471,6 +4004,38 @@ void PhaseIdealLoop::build_loop_late( VectorSet &visited, Node_List &worklist, N
}
}
// Verify that no data node is schedules in the outer loop of a strip
// mined loop.
void PhaseIdealLoop::verify_strip_mined_scheduling(Node *n, Node* least) {
#ifdef ASSERT
if (get_loop(least)->_nest == 0) {
return;
}
IdealLoopTree* loop = get_loop(least);
Node* head = loop->_head;
if (head->is_OuterStripMinedLoop()) {
Node* sfpt = head->as_Loop()->outer_safepoint();
ResourceMark rm;
Unique_Node_List wq;
wq.push(sfpt);
for (uint i = 0; i < wq.size(); i++) {
Node *m = wq.at(i);
for (uint i = 1; i < m->req(); i++) {
Node* nn = m->in(i);
if (nn == n) {
return;
}
if (nn != NULL && has_ctrl(nn) && get_loop(get_ctrl(nn)) == loop) {
wq.push(nn);
}
}
}
ShouldNotReachHere();
}
#endif
}
//------------------------------build_loop_late_post---------------------------
// Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
// Second pass finds latest legal placement, and ideal loop placement.
@ -3580,8 +4145,9 @@ void PhaseIdealLoop::build_loop_late_post( Node *n ) {
// which can inhibit range check elimination.
if (least != early) {
Node* ctrl_out = least->unique_ctrl_out();
if (ctrl_out && ctrl_out->is_CountedLoop() &&
least == ctrl_out->in(LoopNode::EntryControl)) {
if (ctrl_out && ctrl_out->is_Loop() &&
least == ctrl_out->in(LoopNode::EntryControl) &&
(ctrl_out->is_CountedLoop() || ctrl_out->is_OuterStripMinedLoop())) {
Node* least_dom = idom(least);
if (get_loop(least_dom)->is_member(get_loop(least))) {
least = least_dom;
@ -3606,6 +4172,7 @@ void PhaseIdealLoop::build_loop_late_post( Node *n ) {
// Assign discovered "here or above" point
least = find_non_split_ctrl(least);
verify_strip_mined_scheduling(n, least);
set_ctrl(n, least);
// Collect inner loop bodies

View File

@ -37,6 +37,7 @@ class CountedLoopNode;
class IdealLoopTree;
class LoopNode;
class Node;
class OuterStripMinedLoopEndNode;
class PhaseIdealLoop;
class CountedLoopReserveKit;
class VectorSet;
@ -71,7 +72,8 @@ protected:
VectorizedLoop=2048,
HasAtomicPostLoop=4096,
HasRangeChecks=8192,
IsMultiversioned=16384};
IsMultiversioned=16384,
StripMined=32768};
char _unswitch_count;
enum { _unswitch_max=3 };
char _postloop_flags;
@ -90,6 +92,7 @@ public:
int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
int is_strip_mined() const { return _loop_flags & StripMined; }
void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
void mark_has_reductions() { _loop_flags |= HasReductions; }
@ -100,6 +103,8 @@ public:
void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
void mark_has_range_checks() { _loop_flags |= HasRangeChecks; }
void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
void mark_strip_mined() { _loop_flags |= StripMined; }
void clear_strip_mined() { _loop_flags &= ~StripMined; }
int unswitch_max() { return _unswitch_max; }
int unswitch_count() { return _unswitch_count; }
@ -131,6 +136,13 @@ public:
#ifndef PRODUCT
virtual void dump_spec(outputStream *st) const;
#endif
void verify_strip_mined(int expect_skeleton) const;
virtual LoopNode* skip_strip_mined(int expect_opaq = 1) { return this; }
virtual IfTrueNode* outer_loop_tail() const { ShouldNotReachHere(); return NULL; }
virtual OuterStripMinedLoopEndNode* outer_loop_end() const { ShouldNotReachHere(); return NULL; }
virtual IfFalseNode* outer_loop_exit() const { ShouldNotReachHere(); return NULL; }
virtual SafePointNode* outer_safepoint() const { ShouldNotReachHere(); return NULL; }
};
//------------------------------Counted Loops----------------------------------
@ -278,6 +290,13 @@ public:
void set_slp_max_unroll(int unroll_factor) { _slp_maximum_unroll_factor = unroll_factor; }
int slp_max_unroll() const { return _slp_maximum_unroll_factor; }
virtual LoopNode* skip_strip_mined(int expect_opaq = 1);
OuterStripMinedLoopNode* outer_loop() const;
virtual IfTrueNode* outer_loop_tail() const;
virtual OuterStripMinedLoopEndNode* outer_loop_end() const;
virtual IfFalseNode* outer_loop_exit() const;
virtual SafePointNode* outer_safepoint() const;
#ifndef PRODUCT
virtual void dump_spec(outputStream *st) const;
#endif
@ -374,6 +393,40 @@ class LoopLimitNode : public Node {
virtual Node* Identity(PhaseGVN* phase);
};
// Support for strip mining
class OuterStripMinedLoopNode : public LoopNode {
private:
CountedLoopNode* inner_loop() const;
public:
OuterStripMinedLoopNode(Compile* C, Node *entry, Node *backedge)
: LoopNode(entry, backedge) {
init_class_id(Class_OuterStripMinedLoop);
init_flags(Flag_is_macro);
C->add_macro_node(this);
}
virtual int Opcode() const;
virtual IfTrueNode* outer_loop_tail() const;
virtual OuterStripMinedLoopEndNode* outer_loop_end() const;
virtual IfFalseNode* outer_loop_exit() const;
virtual SafePointNode* outer_safepoint() const;
void adjust_strip_mined_loop(PhaseIterGVN* igvn);
};
class OuterStripMinedLoopEndNode : public IfNode {
public:
OuterStripMinedLoopEndNode(Node *control, Node *test, float prob, float cnt)
: IfNode(control, test, prob, cnt) {
init_class_id(Class_OuterStripMinedLoopEnd);
}
virtual int Opcode() const;
virtual const Type* Value(PhaseGVN* phase) const;
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
};
// -----------------------------IdealLoopTree----------------------------------
class IdealLoopTree : public ResourceObj {
public:
@ -780,6 +833,7 @@ private:
void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
void build_loop_late_post ( Node* n );
void verify_strip_mined_scheduling(Node *n, Node* least);
// Array of immediate dominance info for each CFG node indexed by node idx
private:
@ -877,7 +931,10 @@ public:
// Per-Node transform
virtual Node *transform( Node *a_node ) { return 0; }
bool is_counted_loop( Node *x, IdealLoopTree *loop );
bool is_counted_loop(Node* x, IdealLoopTree*& loop);
IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
IdealLoopTree* loop, float cl_prob, float le_fcnt,
Node*& entry_control, Node*& iffalse);
Node* exact_limit( IdealLoopTree *loop );
@ -908,8 +965,24 @@ public:
// When nonnull, the clone and original are side-by-side, both are
// dominated by the passed in side_by_side_idom node. Used in
// construction of unswitched loops.
enum CloneLoopMode {
IgnoreStripMined = 0, // Only clone inner strip mined loop
CloneIncludesStripMined = 1, // clone both inner and outer strip mined loops
ControlAroundStripMined = 2 // Only clone inner strip mined loop,
// result control flow branches
// either to inner clone or outer
// strip mined loop.
};
void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
Node* side_by_side_idom = NULL);
CloneLoopMode mode, Node* side_by_side_idom = NULL);
void clone_loop_handle_data_uses(Node* old, Node_List &old_new,
IdealLoopTree* loop, IdealLoopTree* companion_loop,
Node_List*& split_if_set, Node_List*& split_bool_set,
Node_List*& split_cex_set, Node_List& worklist,
uint new_counter, CloneLoopMode mode);
void clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop,
IdealLoopTree* outer_loop, int dd, Node_List &old_new,
Node_List& extra_data_nodes);
// If we got the effect of peeling, either by actually peeling or by
// making a pre-loop which must execute at least once, we can remove
@ -1020,7 +1093,8 @@ public:
// and inserting an if to select fast-slow versions.
ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
Node_List &old_new,
int opcode);
int opcode,
CloneLoopMode mode);
// Clone a loop and return the clone head (clone_loop_head).
// Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,

View File

@ -26,6 +26,7 @@
#include "memory/allocation.inline.hpp"
#include "memory/resourceArea.hpp"
#include "opto/addnode.hpp"
#include "opto/callnode.hpp"
#include "opto/castnode.hpp"
#include "opto/connode.hpp"
#include "opto/castnode.hpp"
@ -306,7 +307,12 @@ Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
get_ctrl(m->in(2)) != n_ctrl &&
get_ctrl(m->in(3)) != n_ctrl) {
// Move the AddP up to dominating point
set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
Node* c = find_non_split_ctrl(idom(n_ctrl));
if (c->is_OuterStripMinedLoop()) {
c->as_Loop()->verify_strip_mined(1);
c = c->in(LoopNode::EntryControl);
}
set_ctrl_and_loop(m, c);
continue;
}
return NULL;
@ -750,14 +756,13 @@ Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) {
if (ctrl_ok) {
// move the Store
_igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
_igvn.replace_input_of(n, 0, n_loop->_head->in(LoopNode::EntryControl));
_igvn.replace_input_of(n, 0, n_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl));
_igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
// Disconnect the phi now. An empty phi can confuse other
// optimizations in this pass of loop opts.
_igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
n_loop->_body.yank(mem);
IdealLoopTree* new_loop = get_loop(n->in(0));
set_ctrl_and_loop(n, n->in(0));
return n;
@ -840,6 +845,16 @@ void PhaseIdealLoop::try_move_store_after_loop(Node* n) {
_igvn.replace_node(hook, n);
return;
}
#ifdef ASSERT
if (n_loop->_head->is_Loop() && n_loop->_head->as_Loop()->is_strip_mined()) {
assert(n_loop->_head->Opcode() == Op_CountedLoop, "outer loop is a strip mined");
n_loop->_head->as_Loop()->verify_strip_mined(1);
Node* outer = n_loop->_head->as_CountedLoop()->outer_loop();
IdealLoopTree* outer_loop = get_loop(outer);
assert(n_loop->_parent == outer_loop, "broken loop tree");
assert(get_loop(lca) == outer_loop, "safepoint in outer loop consume all memory state");
}
#endif
// Move store out of the loop
_igvn.replace_node(hook, n->in(MemNode::Memory));
@ -1016,7 +1031,7 @@ Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
IdealLoopTree *u_loop = get_loop( useblock );
return (u_loop->_irreducible || u_loop->_child)
? useblock
: u_loop->_head->in(LoopNode::EntryControl);
: u_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
}
@ -1569,6 +1584,252 @@ void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
}
}
void PhaseIdealLoop::clone_loop_handle_data_uses(Node* old, Node_List &old_new,
IdealLoopTree* loop, IdealLoopTree* outer_loop,
Node_List*& split_if_set, Node_List*& split_bool_set,
Node_List*& split_cex_set, Node_List& worklist,
uint new_counter, CloneLoopMode mode) {
Node* nnn = old_new[old->_idx];
// Copy uses to a worklist, so I can munge the def-use info
// with impunity.
for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
worklist.push(old->fast_out(j));
while( worklist.size() ) {
Node *use = worklist.pop();
if (!has_node(use)) continue; // Ignore dead nodes
if (use->in(0) == C->top()) continue;
IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
// Check for data-use outside of loop - at least one of OLD or USE
// must not be a CFG node.
#ifdef ASSERT
if (loop->_head->as_Loop()->is_strip_mined() && outer_loop->is_member(use_loop) && !loop->is_member(use_loop) && old_new[use->_idx] == NULL) {
Node* sfpt = loop->_head->as_CountedLoop()->outer_safepoint();
assert(mode == ControlAroundStripMined && use == sfpt, "missed a node");
}
#endif
if (!loop->is_member(use_loop) && !outer_loop->is_member(use_loop) && (!old->is_CFG() || !use->is_CFG())) {
// If the Data use is an IF, that means we have an IF outside of the
// loop that is switching on a condition that is set inside of the
// loop. Happens if people set a loop-exit flag; then test the flag
// in the loop to break the loop, then test is again outside of the
// loop to determine which way the loop exited.
// Loop predicate If node connects to Bool node through Opaque1 node.
if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use) || use->Opcode() == Op_Opaque4) {
// Since this code is highly unlikely, we lazily build the worklist
// of such Nodes to go split.
if (!split_if_set) {
ResourceArea *area = Thread::current()->resource_area();
split_if_set = new Node_List(area);
}
split_if_set->push(use);
}
if (use->is_Bool()) {
if (!split_bool_set) {
ResourceArea *area = Thread::current()->resource_area();
split_bool_set = new Node_List(area);
}
split_bool_set->push(use);
}
if (use->Opcode() == Op_CreateEx) {
if (!split_cex_set) {
ResourceArea *area = Thread::current()->resource_area();
split_cex_set = new Node_List(area);
}
split_cex_set->push(use);
}
// Get "block" use is in
uint idx = 0;
while( use->in(idx) != old ) idx++;
Node *prev = use->is_CFG() ? use : get_ctrl(use);
assert(!loop->is_member(get_loop(prev)) && !outer_loop->is_member(get_loop(prev)), "" );
Node *cfg = prev->_idx >= new_counter
? prev->in(2)
: idom(prev);
if( use->is_Phi() ) // Phi use is in prior block
cfg = prev->in(idx); // NOT in block of Phi itself
if (cfg->is_top()) { // Use is dead?
_igvn.replace_input_of(use, idx, C->top());
continue;
}
while(!outer_loop->is_member(get_loop(cfg))) {
prev = cfg;
cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
}
// If the use occurs after merging several exits from the loop, then
// old value must have dominated all those exits. Since the same old
// value was used on all those exits we did not need a Phi at this
// merge point. NOW we do need a Phi here. Each loop exit value
// is now merged with the peeled body exit; each exit gets its own
// private Phi and those Phis need to be merged here.
Node *phi;
if( prev->is_Region() ) {
if( idx == 0 ) { // Updating control edge?
phi = prev; // Just use existing control
} else { // Else need a new Phi
phi = PhiNode::make( prev, old );
// Now recursively fix up the new uses of old!
for( uint i = 1; i < prev->req(); i++ ) {
worklist.push(phi); // Onto worklist once for each 'old' input
}
}
} else {
// Get new RegionNode merging old and new loop exits
prev = old_new[prev->_idx];
assert( prev, "just made this in step 7" );
if( idx == 0) { // Updating control edge?
phi = prev; // Just use existing control
} else { // Else need a new Phi
// Make a new Phi merging data values properly
phi = PhiNode::make( prev, old );
phi->set_req( 1, nnn );
}
}
// If inserting a new Phi, check for prior hits
if( idx != 0 ) {
Node *hit = _igvn.hash_find_insert(phi);
if( hit == NULL ) {
_igvn.register_new_node_with_optimizer(phi); // Register new phi
} else { // or
// Remove the new phi from the graph and use the hit
_igvn.remove_dead_node(phi);
phi = hit; // Use existing phi
}
set_ctrl(phi, prev);
}
// Make 'use' use the Phi instead of the old loop body exit value
_igvn.replace_input_of(use, idx, phi);
if( use->_idx >= new_counter ) { // If updating new phis
// Not needed for correctness, but prevents a weak assert
// in AddPNode from tripping (when we end up with different
// base & derived Phis that will become the same after
// IGVN does CSE).
Node *hit = _igvn.hash_find_insert(use);
if( hit ) // Go ahead and re-hash for hits.
_igvn.replace_node( use, hit );
}
// If 'use' was in the loop-exit block, it now needs to be sunk
// below the post-loop merge point.
sink_use( use, prev );
}
}
}
void PhaseIdealLoop::clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop,
IdealLoopTree* outer_loop, int dd, Node_List &old_new,
Node_List& extra_data_nodes) {
if (head->is_strip_mined() && mode != IgnoreStripMined) {
CountedLoopNode* cl = head->as_CountedLoop();
Node* l = cl->outer_loop();
Node* tail = cl->outer_loop_tail();
IfNode* le = cl->outer_loop_end();
Node* sfpt = cl->outer_safepoint();
CountedLoopEndNode* cle = cl->loopexit();
CountedLoopNode* new_cl = old_new[cl->_idx]->as_CountedLoop();
CountedLoopEndNode* new_cle = new_cl->as_CountedLoop()->loopexit();
Node* cle_out = cle->proj_out(false);
Node* new_sfpt = NULL;
Node* new_cle_out = cle_out->clone();
old_new.map(cle_out->_idx, new_cle_out);
if (mode == CloneIncludesStripMined) {
// clone outer loop body
Node* new_l = l->clone();
Node* new_tail = tail->clone();
IfNode* new_le = le->clone()->as_If();
new_sfpt = sfpt->clone();
set_loop(new_l, outer_loop->_parent);
set_idom(new_l, new_l->in(LoopNode::EntryControl), dd);
set_loop(new_cle_out, outer_loop->_parent);
set_idom(new_cle_out, new_cle, dd);
set_loop(new_sfpt, outer_loop->_parent);
set_idom(new_sfpt, new_cle_out, dd);
set_loop(new_le, outer_loop->_parent);
set_idom(new_le, new_sfpt, dd);
set_loop(new_tail, outer_loop->_parent);
set_idom(new_tail, new_le, dd);
set_idom(new_cl, new_l, dd);
old_new.map(l->_idx, new_l);
old_new.map(tail->_idx, new_tail);
old_new.map(le->_idx, new_le);
old_new.map(sfpt->_idx, new_sfpt);
new_l->set_req(LoopNode::LoopBackControl, new_tail);
new_l->set_req(0, new_l);
new_tail->set_req(0, new_le);
new_le->set_req(0, new_sfpt);
new_sfpt->set_req(0, new_cle_out);
new_cle_out->set_req(0, new_cle);
new_cl->set_req(LoopNode::EntryControl, new_l);
_igvn.register_new_node_with_optimizer(new_l);
_igvn.register_new_node_with_optimizer(new_tail);
_igvn.register_new_node_with_optimizer(new_le);
} else {
Node *newhead = old_new[loop->_head->_idx];
newhead->as_Loop()->clear_strip_mined();
_igvn.replace_input_of(newhead, LoopNode::EntryControl, newhead->in(LoopNode::EntryControl)->in(LoopNode::EntryControl));
set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
}
// Look at data node that were assigned a control in the outer
// loop: they are kept in the outer loop by the safepoint so start
// from the safepoint node's inputs.
IdealLoopTree* outer_loop = get_loop(l);
Node_Stack stack(2);
stack.push(sfpt, 1);
uint new_counter = C->unique();
while (stack.size() > 0) {
Node* n = stack.node();
uint i = stack.index();
while (i < n->req() &&
(n->in(i) == NULL ||
!has_ctrl(n->in(i)) ||
get_loop(get_ctrl(n->in(i))) != outer_loop ||
(old_new[n->in(i)->_idx] != NULL && old_new[n->in(i)->_idx]->_idx >= new_counter))) {
i++;
}
if (i < n->req()) {
stack.set_index(i+1);
stack.push(n->in(i), 0);
} else {
assert(old_new[n->_idx] == NULL || n == sfpt || old_new[n->_idx]->_idx < new_counter, "no clone yet");
Node* m = n == sfpt ? new_sfpt : n->clone();
if (m != NULL) {
for (uint i = 0; i < n->req(); i++) {
if (m->in(i) != NULL && old_new[m->in(i)->_idx] != NULL) {
m->set_req(i, old_new[m->in(i)->_idx]);
}
}
} else {
assert(n == sfpt && mode != CloneIncludesStripMined, "where's the safepoint clone?");
}
if (n != sfpt) {
extra_data_nodes.push(n);
_igvn.register_new_node_with_optimizer(m);
assert(get_ctrl(n) == cle_out, "what other control?");
set_ctrl(m, new_cle_out);
old_new.map(n->_idx, m);
}
stack.pop();
}
}
if (mode == CloneIncludesStripMined) {
_igvn.register_new_node_with_optimizer(new_sfpt);
_igvn.register_new_node_with_optimizer(new_cle_out);
}
} else {
Node *newhead = old_new[loop->_head->_idx];
set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
}
}
//------------------------------clone_loop-------------------------------------
//
// C L O N E A L O O P B O D Y
@ -1597,7 +1858,10 @@ void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
// dominated by the side_by_side_idom node. Used in construction of
// unswitched loops.
void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
Node* side_by_side_idom) {
CloneLoopMode mode, Node* side_by_side_idom) {
LoopNode* head = loop->_head->as_Loop();
head->verify_strip_mined(1);
if (C->do_vector_loop() && PrintOpto) {
const char* mname = C->method()->name()->as_quoted_ascii();
@ -1630,6 +1894,7 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
_igvn.register_new_node_with_optimizer(nnn);
}
IdealLoopTree* outer_loop = (head->is_strip_mined() && mode != IgnoreStripMined) ? get_loop(head->as_CountedLoop()->outer_loop()) : loop;
// Step 2: Fix the edges in the new body. If the old input is outside the
// loop use it. If the old input is INside the loop, use the corresponding
@ -1641,7 +1906,7 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
if (has_ctrl(old)) {
set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
} else {
set_loop(nnn, loop->_parent);
set_loop(nnn, outer_loop->_parent);
if (old->outcnt() > 0) {
set_idom( nnn, old_new[idom(old)->_idx], dd );
}
@ -1657,22 +1922,21 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
}
_igvn.hash_find_insert(nnn);
}
Node *newhead = old_new[loop->_head->_idx];
set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
ResourceArea *area = Thread::current()->resource_area();
Node_List extra_data_nodes(area);
clone_outer_loop(head, mode, loop, outer_loop, dd, old_new, extra_data_nodes);
// Step 3: Now fix control uses. Loop varying control uses have already
// been fixed up (as part of all input edges in Step 2). Loop invariant
// control uses must be either an IfFalse or an IfTrue. Make a merge
// point to merge the old and new IfFalse/IfTrue nodes; make the use
// refer to this.
ResourceArea *area = Thread::current()->resource_area();
Node_List worklist(area);
uint new_counter = C->unique();
for( i = 0; i < loop->_body.size(); i++ ) {
Node* old = loop->_body.at(i);
if( !old->is_CFG() ) continue;
Node* nnn = old_new[old->_idx];
// Copy uses to a worklist, so I can munge the def-use info
// with impunity.
@ -1686,9 +1950,29 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
if( !loop->is_member( use_loop ) && use->is_CFG() ) {
// Both OLD and USE are CFG nodes here.
assert( use->is_Proj(), "" );
Node* nnn = old_new[old->_idx];
Node* newuse = NULL;
if (head->is_strip_mined() && mode != IgnoreStripMined) {
CountedLoopNode* cl = head->as_CountedLoop();
CountedLoopEndNode* cle = cl->loopexit();
Node* cle_out = cle->proj_out(false);
if (use == cle_out) {
IfNode* le = cl->outer_loop_end();
use = le->proj_out(false);
use_loop = get_loop(use);
if (mode == CloneIncludesStripMined) {
nnn = old_new[le->_idx];
} else {
newuse = old_new[cle_out->_idx];
}
}
}
if (newuse == NULL) {
newuse = use->clone();
}
// Clone the loop exit control projection
Node *newuse = use->clone();
if (C->do_vector_loop()) {
cm.verify_insert_and_clone(use, newuse, cm.clone_idx());
}
@ -1722,6 +2006,10 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
if( useuse->in(k) == use ) {
useuse->set_req(k, r);
uses_found++;
if (useuse->is_Loop() && k == LoopNode::EntryControl) {
assert(dom_depth(useuse) > dd_r , "");
set_idom(useuse, r, dom_depth(useuse));
}
}
}
l -= uses_found; // we deleted 1 or more copies of this edge
@ -1745,126 +2033,16 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
Node_List *split_cex_set = NULL;
for( i = 0; i < loop->_body.size(); i++ ) {
Node* old = loop->_body.at(i);
Node* nnn = old_new[old->_idx];
// Copy uses to a worklist, so I can munge the def-use info
// with impunity.
for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
worklist.push(old->fast_out(j));
clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
split_bool_set, split_cex_set, worklist, new_counter,
mode);
}
while( worklist.size() ) {
Node *use = worklist.pop();
if (!has_node(use)) continue; // Ignore dead nodes
if (use->in(0) == C->top()) continue;
IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
// Check for data-use outside of loop - at least one of OLD or USE
// must not be a CFG node.
if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
// If the Data use is an IF, that means we have an IF outside of the
// loop that is switching on a condition that is set inside of the
// loop. Happens if people set a loop-exit flag; then test the flag
// in the loop to break the loop, then test is again outside of the
// loop to determine which way the loop exited.
// Loop predicate If node connects to Bool node through Opaque1 node.
if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use) || use->Opcode() == Op_Opaque4) {
// Since this code is highly unlikely, we lazily build the worklist
// of such Nodes to go split.
if (!split_if_set) {
split_if_set = new Node_List(area);
}
split_if_set->push(use);
}
if (use->is_Bool()) {
if (!split_bool_set) {
split_bool_set = new Node_List(area);
}
split_bool_set->push(use);
}
if (use->Opcode() == Op_CreateEx) {
if (!split_cex_set) {
split_cex_set = new Node_List(area);
}
split_cex_set->push(use);
}
// Get "block" use is in
uint idx = 0;
while( use->in(idx) != old ) idx++;
Node *prev = use->is_CFG() ? use : get_ctrl(use);
assert( !loop->is_member( get_loop( prev ) ), "" );
Node *cfg = prev->_idx >= new_counter
? prev->in(2)
: idom(prev);
if( use->is_Phi() ) // Phi use is in prior block
cfg = prev->in(idx); // NOT in block of Phi itself
if (cfg->is_top()) { // Use is dead?
_igvn.replace_input_of(use, idx, C->top());
continue;
}
while( !loop->is_member( get_loop( cfg ) ) ) {
prev = cfg;
cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
}
// If the use occurs after merging several exits from the loop, then
// old value must have dominated all those exits. Since the same old
// value was used on all those exits we did not need a Phi at this
// merge point. NOW we do need a Phi here. Each loop exit value
// is now merged with the peeled body exit; each exit gets its own
// private Phi and those Phis need to be merged here.
Node *phi;
if( prev->is_Region() ) {
if( idx == 0 ) { // Updating control edge?
phi = prev; // Just use existing control
} else { // Else need a new Phi
phi = PhiNode::make( prev, old );
// Now recursively fix up the new uses of old!
for( uint i = 1; i < prev->req(); i++ ) {
worklist.push(phi); // Onto worklist once for each 'old' input
}
}
} else {
// Get new RegionNode merging old and new loop exits
prev = old_new[prev->_idx];
assert( prev, "just made this in step 7" );
if( idx == 0 ) { // Updating control edge?
phi = prev; // Just use existing control
} else { // Else need a new Phi
// Make a new Phi merging data values properly
phi = PhiNode::make( prev, old );
phi->set_req( 1, nnn );
}
}
// If inserting a new Phi, check for prior hits
if( idx != 0 ) {
Node *hit = _igvn.hash_find_insert(phi);
if( hit == NULL ) {
_igvn.register_new_node_with_optimizer(phi); // Register new phi
} else { // or
// Remove the new phi from the graph and use the hit
_igvn.remove_dead_node(phi);
phi = hit; // Use existing phi
}
set_ctrl(phi, prev);
}
// Make 'use' use the Phi instead of the old loop body exit value
_igvn.replace_input_of(use, idx, phi);
if( use->_idx >= new_counter ) { // If updating new phis
// Not needed for correctness, but prevents a weak assert
// in AddPNode from tripping (when we end up with different
// base & derived Phis that will become the same after
// IGVN does CSE).
Node *hit = _igvn.hash_find_insert(use);
if( hit ) // Go ahead and re-hash for hits.
_igvn.replace_node( use, hit );
}
// If 'use' was in the loop-exit block, it now needs to be sunk
// below the post-loop merge point.
sink_use( use, prev );
}
}
for (i = 0; i < extra_data_nodes.size(); i++) {
Node* old = extra_data_nodes.at(i);
clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
split_bool_set, split_cex_set, worklist, new_counter,
mode);
}
// Check for IFs that need splitting/cloning. Happens if an IF outside of
@ -2956,7 +3134,7 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
clone_loop( loop, old_new, dd );
clone_loop(loop, old_new, dd, IgnoreStripMined);
const uint clone_exit_idx = 1;
const uint orig_exit_idx = 2;

View File

@ -282,7 +282,8 @@ void PhaseMacroExpand::eliminate_card_mark(Node* p2x) {
if (!this_region->in(ind)->is_IfFalse()) {
ind = 2;
}
if (this_region->in(ind)->is_IfFalse()) {
if (this_region->in(ind)->is_IfFalse() &&
this_region->in(ind)->in(0)->Opcode() == Op_If) {
Node* bol = this_region->in(ind)->in(0)->in(1);
assert(bol->is_Bool(), "");
cmpx = bol->in(1);
@ -2660,6 +2661,8 @@ void PhaseMacroExpand::eliminate_macro_nodes() {
break;
case Node::Class_ArrayCopy:
break;
case Node::Class_OuterStripMinedLoop:
break;
default:
assert(n->Opcode() == Op_LoopLimit ||
n->Opcode() == Op_Opaque1 ||
@ -2733,6 +2736,10 @@ bool PhaseMacroExpand::expand_macro_nodes() {
} else if (n->Opcode() == Op_Opaque4) {
_igvn.replace_node(n, n->in(2));
success = true;
} else if (n->Opcode() == Op_OuterStripMinedLoop) {
n->as_OuterStripMinedLoop()->adjust_strip_mined_loop(&_igvn);
C->remove_macro_node(n);
success = true;
}
assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
progress = progress || success;

View File

@ -111,6 +111,8 @@ class MulNode;
class MultiNode;
class MultiBranchNode;
class NeverBranchNode;
class OuterStripMinedLoopNode;
class OuterStripMinedLoopEndNode;
class Node;
class Node_Array;
class Node_List;
@ -623,8 +625,9 @@ public:
DEFINE_CLASS_ID(Catch, PCTable, 0)
DEFINE_CLASS_ID(Jump, PCTable, 1)
DEFINE_CLASS_ID(If, MultiBranch, 1)
DEFINE_CLASS_ID(CountedLoopEnd, If, 0)
DEFINE_CLASS_ID(RangeCheck, If, 1)
DEFINE_CLASS_ID(CountedLoopEnd, If, 0)
DEFINE_CLASS_ID(RangeCheck, If, 1)
DEFINE_CLASS_ID(OuterStripMinedLoopEnd, If, 2)
DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2)
DEFINE_CLASS_ID(Start, Multi, 2)
DEFINE_CLASS_ID(MemBar, Multi, 3)
@ -684,8 +687,9 @@ public:
DEFINE_CLASS_ID(Region, Node, 5)
DEFINE_CLASS_ID(Loop, Region, 0)
DEFINE_CLASS_ID(Root, Loop, 0)
DEFINE_CLASS_ID(CountedLoop, Loop, 1)
DEFINE_CLASS_ID(Root, Loop, 0)
DEFINE_CLASS_ID(CountedLoop, Loop, 1)
DEFINE_CLASS_ID(OuterStripMinedLoop, Loop, 2)
DEFINE_CLASS_ID(Sub, Node, 6)
DEFINE_CLASS_ID(Cmp, Sub, 0)
@ -841,6 +845,8 @@ public:
DEFINE_CLASS_QUERY(Mul)
DEFINE_CLASS_QUERY(Multi)
DEFINE_CLASS_QUERY(MultiBranch)
DEFINE_CLASS_QUERY(OuterStripMinedLoop)
DEFINE_CLASS_QUERY(OuterStripMinedLoopEnd)
DEFINE_CLASS_QUERY(Parm)
DEFINE_CLASS_QUERY(PCTable)
DEFINE_CLASS_QUERY(Phi)

View File

@ -1353,6 +1353,7 @@ bool SuperWord::follow_def_uses(Node_List* p) {
for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
Node* t2 = s2->fast_out(j);
if (!in_bb(t2)) continue;
if (t2->Opcode() == Op_AddI && t2 == _lp->as_CountedLoop()->incr()) continue; // don't mess with the iv
if (!opnd_positions_match(s1, t1, s2, t2))
continue;
if (stmts_can_pack(t1, t2, align)) {
@ -3313,7 +3314,7 @@ CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode* cl) {
return NULL;
}
Node* p_f = cl->in(LoopNode::EntryControl)->in(0)->in(0);
Node* p_f = cl->skip_strip_mined()->in(LoopNode::EntryControl)->in(0)->in(0);
if (!p_f->is_IfFalse()) return NULL;
if (!p_f->in(0)->is_CountedLoopEnd()) return NULL;
CountedLoopEndNode* pre_end = p_f->in(0)->as_CountedLoopEnd();

View File

@ -2207,6 +2207,21 @@ bool Arguments::check_vm_args_consistency() {
}
FLAG_SET_CMDLINE(bool, PostLoopMultiversioning, false);
}
if (UseCountedLoopSafepoints && LoopStripMiningIter == 0) {
if (!FLAG_IS_DEFAULT(UseCountedLoopSafepoints) || !FLAG_IS_DEFAULT(LoopStripMiningIter)) {
warning("When counted loop safepoints are enabled, LoopStripMiningIter must be at least 1 (a safepoint every 1 iteration): setting it to 1");
}
LoopStripMiningIter = 1;
} else if (!UseCountedLoopSafepoints && LoopStripMiningIter > 0) {
if (!FLAG_IS_DEFAULT(UseCountedLoopSafepoints) || !FLAG_IS_DEFAULT(LoopStripMiningIter)) {
warning("Disabling counted safepoints implies no loop strip mining: setting LoopStripMiningIter to 0");
}
LoopStripMiningIter = 0;
}
if (FLAG_IS_DEFAULT(LoopStripMiningIterShortLoop)) {
// blind guess
LoopStripMiningIterShortLoop = LoopStripMiningIter / 10;
}
#endif
return status;
}

View File

@ -61,7 +61,8 @@ public class UseCountedLoopSafepointsTest {
OutputAnalyzer oa;
try {
oa = ProcessTools.executeTestJvm("-XX:+UnlockDiagnosticVMOptions", "-Xbootclasspath/a:.",
"-XX:" + (enabled ? "+" : "-") + "UseCountedLoopSafepoints", "-XX:+WhiteBoxAPI",
"-XX:" + (enabled ? "+" : "-") + "UseCountedLoopSafepoints",
"-XX:LoopStripMiningIter=" + (enabled ? "1" : "0"), "-XX:+WhiteBoxAPI",
"-XX:-Inline", "-Xbatch", "-XX:+PrintIdeal", "-XX:LoopUnrollLimit=0",
"-XX:CompileOnly=" + UseCountedLoopSafepoints.class.getName() + "::testMethod",
UseCountedLoopSafepoints.class.getName());