8283466: C2: missing skeleton predicates in peeled loop
Reviewed-by: roland, chagedorn
This commit is contained in:
parent
a82417fa19
commit
199832a710
@ -491,24 +491,25 @@ Node* PhaseIdealLoop::skip_loop_predicates(Node* entry) {
|
||||
}
|
||||
|
||||
Node* PhaseIdealLoop::skip_all_loop_predicates(Node* entry) {
|
||||
Node* predicate = NULL;
|
||||
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
|
||||
if (predicate != NULL) {
|
||||
entry = skip_loop_predicates(entry);
|
||||
}
|
||||
if (UseProfiledLoopPredicate) {
|
||||
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
|
||||
if (predicate != NULL) { // right pattern that can be used by loop predication
|
||||
entry = skip_loop_predicates(entry);
|
||||
Predicates predicates(entry);
|
||||
return predicates.skip_all();
|
||||
}
|
||||
|
||||
//--------------------------next_predicate---------------------------------
|
||||
// Find next related predicate, useful for iterating over all related predicates
|
||||
ProjNode* PhaseIdealLoop::next_predicate(ProjNode* predicate) {
|
||||
IfNode* iff = predicate->in(0)->as_If();
|
||||
ProjNode* uncommon_proj = iff->proj_out(1 - predicate->_con);
|
||||
Node* rgn = uncommon_proj->unique_ctrl_out();
|
||||
assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
|
||||
Node* next = iff->in(0);
|
||||
if (next != nullptr && next->is_Proj() && next->in(0)->is_If()) {
|
||||
uncommon_proj = next->in(0)->as_If()->proj_out(1 - next->as_Proj()->_con);
|
||||
if (uncommon_proj->unique_ctrl_out() == rgn) { // lead into same region
|
||||
return next->as_Proj();
|
||||
}
|
||||
}
|
||||
if (UseLoopPredicate) {
|
||||
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
|
||||
if (predicate != NULL) { // right pattern that can be used by loop predication
|
||||
entry = skip_loop_predicates(entry);
|
||||
}
|
||||
}
|
||||
return entry;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
//--------------------------find_predicate_insertion_point-------------------
|
||||
@ -522,6 +523,28 @@ ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c, Deoptimi
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//--------------------------Predicates::Predicates--------------------------
|
||||
// given loop entry, find all predicates above loop
|
||||
PhaseIdealLoop::Predicates::Predicates(Node* entry) {
|
||||
_loop_limit_check = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
|
||||
if (_loop_limit_check != nullptr) {
|
||||
entry = skip_loop_predicates(entry);
|
||||
}
|
||||
if (UseProfiledLoopPredicate) {
|
||||
_profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
|
||||
if (_profile_predicate != nullptr) {
|
||||
entry = skip_loop_predicates(entry);
|
||||
}
|
||||
}
|
||||
if (UseLoopPredicate) {
|
||||
_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
|
||||
if (_predicate != nullptr) {
|
||||
entry = skip_loop_predicates(entry);
|
||||
}
|
||||
}
|
||||
_entry_to_all_predicates = entry;
|
||||
}
|
||||
|
||||
//--------------------------find_predicate------------------------------------
|
||||
// Find a predicate
|
||||
Node* PhaseIdealLoop::find_predicate(Node* entry) {
|
||||
|
@ -614,13 +614,16 @@ void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_ne
|
||||
// after peel and predicate move
|
||||
//
|
||||
// stmt1
|
||||
// |
|
||||
// v
|
||||
// loop predicate
|
||||
// /
|
||||
// /
|
||||
// clone / orig
|
||||
// /
|
||||
// / +----------+
|
||||
// / | |
|
||||
// / loop predicate |
|
||||
// / | |
|
||||
// / | |
|
||||
// v v |
|
||||
// TOP-->loop clone loop<----+ |
|
||||
@ -647,7 +650,10 @@ void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_ne
|
||||
//
|
||||
// final graph
|
||||
//
|
||||
// stmt1
|
||||
// stmt1
|
||||
// |
|
||||
// v
|
||||
// loop predicate
|
||||
// |
|
||||
// v
|
||||
// stmt2 clone
|
||||
@ -660,7 +666,7 @@ void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_ne
|
||||
// false true
|
||||
// | |
|
||||
// | v
|
||||
// | loop predicate
|
||||
// | initialized skeleton predicates
|
||||
// | |
|
||||
// | v
|
||||
// | loop<----+
|
||||
@ -714,7 +720,9 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
|
||||
|
||||
// Step 1: Clone the loop body. The clone becomes the peeled iteration.
|
||||
// The pre-loop illegally has 2 control users (old & new loops).
|
||||
clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), ControlAroundStripMined);
|
||||
const uint idx_before_clone = Compile::current()->unique();
|
||||
LoopNode* outer_loop_head = head->skip_strip_mined();
|
||||
clone_loop(loop, old_new, dom_depth(outer_loop_head), ControlAroundStripMined);
|
||||
|
||||
// Step 2: Make the old-loop fall-in edges point to the peeled iteration.
|
||||
// Do this by making the old-loop fall-in edges act as if they came
|
||||
@ -723,8 +731,8 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
|
||||
// the pre-loop with only 1 user (the new peeled iteration), but the
|
||||
// peeled-loop backedge has 2 users.
|
||||
Node* new_entry = old_new[head->in(LoopNode::LoopBackControl)->_idx];
|
||||
_igvn.hash_delete(head->skip_strip_mined());
|
||||
head->skip_strip_mined()->set_req(LoopNode::EntryControl, new_entry);
|
||||
_igvn.hash_delete(outer_loop_head);
|
||||
outer_loop_head->set_req(LoopNode::EntryControl, new_entry);
|
||||
for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
|
||||
Node* old = head->fast_out(j);
|
||||
if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) {
|
||||
@ -753,16 +761,33 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
|
||||
|
||||
// Step 4: Correct dom-depth info. Set to loop-head depth.
|
||||
|
||||
int dd = dom_depth(head->skip_strip_mined());
|
||||
set_idom(head->skip_strip_mined(), head->skip_strip_mined()->in(LoopNode::EntryControl), dd);
|
||||
int dd_outer_loop_head = dom_depth(outer_loop_head);
|
||||
set_idom(outer_loop_head, outer_loop_head->in(LoopNode::EntryControl), dd_outer_loop_head);
|
||||
for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
|
||||
Node *old = loop->_body.at(j3);
|
||||
Node *nnn = old_new[old->_idx];
|
||||
if (!has_ctrl(nnn)) {
|
||||
set_idom(nnn, idom(nnn), dd-1);
|
||||
set_idom(nnn, idom(nnn), dd_outer_loop_head-1);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5: skeleton_predicates instantiation
|
||||
if (counted_loop && UseLoopPredicate) {
|
||||
CountedLoopNode *cl_head = head->as_CountedLoop();
|
||||
Node* init = cl_head->init_trip();
|
||||
Node* stride = cl_head->stride();
|
||||
IdealLoopTree* outer_loop = get_loop(outer_loop_head);
|
||||
Predicates predicates(new_head->in(LoopNode::EntryControl));
|
||||
initialize_skeleton_predicates_for_peeled_loop(predicates.predicate(),
|
||||
outer_loop_head, dd_outer_loop_head,
|
||||
init, stride, outer_loop,
|
||||
idx_before_clone, old_new);
|
||||
initialize_skeleton_predicates_for_peeled_loop(predicates.profile_predicate(),
|
||||
outer_loop_head, dd_outer_loop_head,
|
||||
init, stride, outer_loop,
|
||||
idx_before_clone, old_new);
|
||||
}
|
||||
|
||||
// Now force out all loop-invariant dominating tests. The optimizer
|
||||
// finds some, but we _know_ they are all useless.
|
||||
peeled_dom_test_elim(loop,old_new);
|
||||
@ -1318,12 +1343,12 @@ void PhaseIdealLoop::copy_skeleton_predicates_to_main_loop_helper(Node* predicat
|
||||
// Clone the skeleton predicate twice and initialize one with the initial
|
||||
// value of the loop induction variable. Leave the other predicate
|
||||
// to be initialized when increasing the stride during loop unrolling.
|
||||
prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, opaque_init, NULL, predicate, uncommon_proj,
|
||||
current_proj, outer_loop, prev_proj);
|
||||
prev_proj = clone_skeleton_predicate_and_initialize(iff, opaque_init, NULL, predicate, uncommon_proj,
|
||||
current_proj, outer_loop, prev_proj);
|
||||
assert(skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "");
|
||||
|
||||
prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, init, stride, predicate, uncommon_proj,
|
||||
current_proj, outer_loop, prev_proj);
|
||||
prev_proj = clone_skeleton_predicate_and_initialize(iff, init, stride, predicate, uncommon_proj,
|
||||
current_proj, outer_loop, prev_proj);
|
||||
assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "");
|
||||
|
||||
// Rewire any control inputs from the cloned skeleton predicates down to the main and post loop for data nodes that are part of the
|
||||
@ -1476,8 +1501,8 @@ Node* PhaseIdealLoop::clone_skeleton_predicate_bool(Node* iff, Node* new_init, N
|
||||
|
||||
// Clone a skeleton predicate for the main loop. new_init and new_stride are set as new inputs. Since the predicates cannot fail at runtime,
|
||||
// Halt nodes are inserted instead of uncommon traps.
|
||||
Node* PhaseIdealLoop::clone_skeleton_predicate_for_main_or_post_loop(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj,
|
||||
Node* control, IdealLoopTree* outer_loop, Node* input_proj) {
|
||||
Node* PhaseIdealLoop::clone_skeleton_predicate_and_initialize(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj,
|
||||
Node* control, IdealLoopTree* outer_loop, Node* input_proj) {
|
||||
Node* result = clone_skeleton_predicate_bool(iff, new_init, new_stride, control);
|
||||
Node* proj = predicate->clone();
|
||||
Node* other_proj = uncommon_proj->clone();
|
||||
@ -2007,8 +2032,8 @@ void PhaseIdealLoop::update_main_loop_skeleton_predicates(Node* ctrl, CountedLoo
|
||||
_igvn.replace_input_of(iff, 1, iff->in(1)->in(2));
|
||||
} else {
|
||||
// Add back predicates updated for the new stride.
|
||||
prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, init, max_value, entry, proj, ctrl, outer_loop,
|
||||
prev_proj);
|
||||
prev_proj = clone_skeleton_predicate_and_initialize(iff, init, max_value, entry, proj, ctrl, outer_loop,
|
||||
prev_proj);
|
||||
assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "unexpected");
|
||||
}
|
||||
}
|
||||
@ -2036,8 +2061,8 @@ void PhaseIdealLoop::copy_skeleton_predicates_to_post_loop(LoopNode* main_loop_h
|
||||
break;
|
||||
}
|
||||
if (iff->in(1)->Opcode() == Op_Opaque4 && skeleton_predicate_has_opaque(iff)) {
|
||||
prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, init, stride, ctrl, proj, post_loop_entry,
|
||||
post_loop, prev_proj);
|
||||
prev_proj = clone_skeleton_predicate_and_initialize(iff, init, stride, ctrl, proj, post_loop_entry,
|
||||
post_loop, prev_proj);
|
||||
assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "unexpected");
|
||||
}
|
||||
ctrl = ctrl->in(0)->in(0);
|
||||
@ -2048,6 +2073,52 @@ void PhaseIdealLoop::copy_skeleton_predicates_to_post_loop(LoopNode* main_loop_h
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseIdealLoop::initialize_skeleton_predicates_for_peeled_loop(ProjNode* predicate,
|
||||
LoopNode* outer_loop_head,
|
||||
int dd_outer_loop_head,
|
||||
Node* init,
|
||||
Node* stride,
|
||||
IdealLoopTree* outer_loop,
|
||||
const uint idx_before_clone,
|
||||
const Node_List &old_new) {
|
||||
if (predicate == nullptr) {
|
||||
return;
|
||||
}
|
||||
Node* control = outer_loop_head->in(LoopNode::EntryControl);
|
||||
Node* input_proj = control;
|
||||
|
||||
predicate = next_predicate(predicate);
|
||||
while (predicate != nullptr) {
|
||||
IfNode* iff = predicate->in(0)->as_If();
|
||||
if (iff->in(1)->Opcode() == Op_Opaque4) {
|
||||
assert(skeleton_predicate_has_opaque(iff), "unexpected");
|
||||
ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
|
||||
input_proj = clone_skeleton_predicate_and_initialize(iff, init, stride, predicate, uncommon_proj, control, outer_loop, input_proj);
|
||||
|
||||
// Rewire any control inputs from the old skeleton predicates above the peeled iteration down to the initialized
|
||||
// skeleton predicates above the peeled loop.
|
||||
for (DUIterator i = predicate->outs(); predicate->has_out(i); i++) {
|
||||
Node* dependent = predicate->out(i);
|
||||
Node* new_node = old_new[dependent->_idx];
|
||||
|
||||
if (!dependent->is_CFG() &&
|
||||
dependent->_idx < idx_before_clone && // old node
|
||||
new_node != nullptr && // cloned
|
||||
new_node->_idx >= idx_before_clone) { // for peeling
|
||||
// The old nodes from the peeled loop still point to the predicate above the peeled loop.
|
||||
// We need to rewire the dependencies to the newly initialized skeleton predicates.
|
||||
_igvn.replace_input_of(dependent, 0, input_proj);
|
||||
--i; // correct for just deleted predicate->out(i)
|
||||
}
|
||||
}
|
||||
}
|
||||
predicate = next_predicate(predicate);
|
||||
}
|
||||
|
||||
_igvn.replace_input_of(outer_loop_head, LoopNode::EntryControl, input_proj);
|
||||
set_idom(outer_loop_head, input_proj, dd_outer_loop_head);
|
||||
}
|
||||
|
||||
//------------------------------do_unroll--------------------------------------
|
||||
// Unroll the loop body one step - make each trip do 2 iterations.
|
||||
void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip) {
|
||||
|
@ -927,13 +927,16 @@ private:
|
||||
void copy_skeleton_predicates_to_main_loop(CountedLoopNode* pre_head, Node* init, Node* stride, IdealLoopTree* outer_loop, LoopNode* outer_main_head,
|
||||
uint dd_main_head, const uint idx_before_pre_post, const uint idx_after_post_before_pre,
|
||||
Node* zero_trip_guard_proj_main, Node* zero_trip_guard_proj_post, const Node_List &old_new);
|
||||
Node* clone_skeleton_predicate_for_main_or_post_loop(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj, Node* control,
|
||||
IdealLoopTree* outer_loop, Node* input_proj);
|
||||
Node* clone_skeleton_predicate_and_initialize(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj, Node* control,
|
||||
IdealLoopTree* outer_loop, Node* input_proj);
|
||||
Node* clone_skeleton_predicate_bool(Node* iff, Node* new_init, Node* new_stride, Node* control);
|
||||
static bool skeleton_predicate_has_opaque(IfNode* iff);
|
||||
static void get_skeleton_predicates(Node* predicate, Unique_Node_List& list, bool get_opaque = false);
|
||||
void update_main_loop_skeleton_predicates(Node* ctrl, CountedLoopNode* loop_head, Node* init, int stride_con);
|
||||
void copy_skeleton_predicates_to_post_loop(LoopNode* main_loop_head, CountedLoopNode* post_loop_head, Node* init, Node* stride);
|
||||
void initialize_skeleton_predicates_for_peeled_loop(ProjNode* predicate, LoopNode* outer_loop_head, int dd_outer_loop_head,
|
||||
Node* init, Node* stride, IdealLoopTree* outer_loop,
|
||||
const uint idx_before_clone, const Node_List& old_new);
|
||||
void insert_loop_limit_check(ProjNode* limit_check_proj, Node* cmp_limit, Node* bol);
|
||||
#ifdef ASSERT
|
||||
bool only_has_infinite_loops();
|
||||
@ -1328,9 +1331,43 @@ public:
|
||||
|
||||
static Node* skip_all_loop_predicates(Node* entry);
|
||||
static Node* skip_loop_predicates(Node* entry);
|
||||
static ProjNode* next_predicate(ProjNode* predicate);
|
||||
|
||||
// Find a good location to insert a predicate
|
||||
static ProjNode* find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason);
|
||||
|
||||
class Predicates {
|
||||
public:
|
||||
// given loop entry, find all predicates above loop
|
||||
Predicates(Node* entry);
|
||||
|
||||
// Proj of empty loop limit check predicate
|
||||
ProjNode* loop_limit_check() {
|
||||
return _loop_limit_check;
|
||||
}
|
||||
|
||||
// Proj of empty profile predicate
|
||||
ProjNode* profile_predicate() {
|
||||
return _profile_predicate;
|
||||
}
|
||||
|
||||
// Proj of empty predicate
|
||||
ProjNode* predicate() {
|
||||
return _predicate;
|
||||
}
|
||||
|
||||
// First control node above all predicates
|
||||
Node* skip_all() {
|
||||
return _entry_to_all_predicates;
|
||||
}
|
||||
|
||||
private:
|
||||
ProjNode*_loop_limit_check = nullptr;
|
||||
ProjNode* _profile_predicate = nullptr;
|
||||
ProjNode* _predicate = nullptr;
|
||||
Node* _entry_to_all_predicates = nullptr;
|
||||
};
|
||||
|
||||
// Find a predicate
|
||||
static Node* find_predicate(Node* entry);
|
||||
// Construct a range check for a predicate if
|
||||
|
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8283466
|
||||
* @summary When skeleton predicates are not copied into peeled loop and initialized, this can happen:
|
||||
* 1. The rangecheck from a load is hoisted outside of the counted loop.
|
||||
* 2. The counted loop is peeled (we disable unswitching and unrolling with arguments)
|
||||
* 3. The type inside the peeled loop may now be narrower.
|
||||
* 4. The dataflow can die when a type becomes impossible.
|
||||
* 5. The rangecheck is still before the peeling, and is not copied to the peeled loop. Hence
|
||||
* we do not statically realize that the peeled loop can never be entered.
|
||||
*
|
||||
* @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+StressLCM -XX:+StressGCM -XX:+StressCCP -XX:+StressIGVN
|
||||
* -Xcomp -XX:-TieredCompilation
|
||||
* -XX:LoopMaxUnroll=0 -XX:LoopUnrollLimit=0 -XX:-LoopUnswitching
|
||||
* -XX:CompileCommand=compileonly,compiler.loopopts.TestPeelingSkeletonPredicateInitialization::*
|
||||
* compiler.loopopts.TestPeelingSkeletonPredicateInitialization
|
||||
*/
|
||||
|
||||
package compiler.loopopts;
|
||||
|
||||
public class TestPeelingSkeletonPredicateInitialization {
|
||||
int N = 400;
|
||||
int array[] = new int[N];
|
||||
int array2[] = new int[N];
|
||||
void run(int X, int inv, boolean b) {
|
||||
// have the arguments so the values are unknown to C2, cannot optimize things away
|
||||
try {
|
||||
// not sure why needed. maybe has sth to do with div_by_zero below?
|
||||
int tmp = 1 / 0;
|
||||
} catch (ArithmeticException e) {
|
||||
}
|
||||
for(int i = 2; i > X; i-=3) {
|
||||
// potential div_by_zero: somehow the exit is not loop exit but rethrow
|
||||
// also: i-1 only works in peeled iteration
|
||||
// in peeled loop: ConvI2L dies because it knows dataflow is impossible
|
||||
// If skeleton_predicate is missing for peeled loop, then controlflow does not die
|
||||
array[i - 1] /= inv;
|
||||
// loop invariant check that is not hoisted: this becomes reason for peeling
|
||||
array[inv] += 1;
|
||||
if (b) {
|
||||
// seems to be required for the memory phi so that it can be mangled when data flow dies
|
||||
array[inv] += 1;
|
||||
array2[inv] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
public static void main(String[] strArr) {
|
||||
try {
|
||||
TestPeelingSkeletonPredicateInitialization _instance = new TestPeelingSkeletonPredicateInitialization();
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
_instance.run(100, 3, false);
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user