8279888: Local variable independently used by multiple loops can interfere with loop optimizations

Co-authored-by: Claes Redestad <redestad@openjdk.org>
Reviewed-by: thartmann, kvn
This commit is contained in:
Roland Westrelin 2022-04-25 09:30:00 +00:00
parent 4c22a9bcf9
commit 32593df392
9 changed files with 970 additions and 315 deletions
src/hotspot/share
test
hotspot/jtreg/compiler/c2/irTests
micro/org/openjdk/bench/vm/compiler

@ -2288,21 +2288,48 @@ ciTypeFlow::Block* ciTypeFlow::clone_loop_head(Loop* lp, StateVector* temp_vecto
assert(!clone->has_pre_order(), "just created");
clone->set_next_pre_order();
// Insert clone after (orig) tail in reverse post order
clone->set_rpo_next(tail->rpo_next());
tail->set_rpo_next(clone);
// tail->head becomes tail->clone
for (SuccIter iter(tail); !iter.done(); iter.next()) {
if (iter.succ() == head) {
iter.set_succ(clone);
// Update predecessor information
head->predecessors()->remove(tail);
clone->predecessors()->append(tail);
// Accumulate profiled count for all backedges that share this loop's head
int total_count = lp->profiled_count();
for (Loop* lp1 = lp->parent(); lp1 != NULL; lp1 = lp1->parent()) {
for (Loop* lp2 = lp1; lp2 != NULL; lp2 = lp2->sibling()) {
if (lp2->head() == head && !lp2->tail()->is_backedge_copy()) {
total_count += lp2->profiled_count();
}
}
}
flow_block(tail, temp_vector, temp_set);
// Have the most frequent ones branch to the clone instead
int count = 0;
int nb = 0;
Block* latest_tail = tail;
bool done = false;
for (Loop* lp1 = lp; lp1 != NULL && !done; lp1 = lp1->parent()) {
for (Loop* lp2 = lp1; lp2 != NULL && !done; lp2 = lp2->sibling()) {
if (lp2->head() == head && !lp2->tail()->is_backedge_copy()) {
count += lp2->profiled_count();
if (lp2->tail()->post_order() < latest_tail->post_order()) {
latest_tail = lp2->tail();
}
nb++;
for (SuccIter iter(lp2->tail()); !iter.done(); iter.next()) {
if (iter.succ() == head) {
iter.set_succ(clone);
// Update predecessor information
head->predecessors()->remove(lp2->tail());
clone->predecessors()->append(lp2->tail());
}
}
flow_block(lp2->tail(), temp_vector, temp_set);
if (total_count == 0 || count > (total_count * .9)) {
done = true;
}
}
}
}
assert(nb >= 1, "at least one new");
clone->set_rpo_next(latest_tail->rpo_next());
latest_tail->set_rpo_next(clone);
if (head == tail) {
assert(nb == 1, "only when the head is not shared");
// For self-loops, clone->head becomes clone->clone
flow_block(clone, temp_vector, temp_set);
for (SuccIter iter(clone); !iter.done(); iter.next()) {
@ -2454,23 +2481,29 @@ void ciTypeFlow::PreorderLoops::next() {
}
// If the tail is a branch to the head, retrieve how many times that path was taken from profiling
int ciTypeFlow::profiled_count(ciTypeFlow::Loop* loop) {
ciMethodData* methodData = method()->method_data();
int ciTypeFlow::Loop::profiled_count() {
if (_profiled_count >= 0) {
return _profiled_count;
}
ciMethodData* methodData = outer()->method()->method_data();
if (!methodData->is_mature()) {
_profiled_count = 0;
return 0;
}
ciTypeFlow::Block* tail = loop->tail();
ciTypeFlow::Block* tail = this->tail();
if (tail->control() == -1 || tail->has_trap()) {
_profiled_count = 0;
return 0;
}
ciProfileData* data = methodData->bci_to_data(tail->control());
if (data == NULL || !data->is_JumpData()) {
_profiled_count = 0;
return 0;
}
ciBytecodeStream iter(method());
ciBytecodeStream iter(outer()->method());
iter.reset_to_bci(tail->control());
bool is_an_if = false;
@ -2509,21 +2542,25 @@ int ciTypeFlow::profiled_count(ciTypeFlow::Loop* loop) {
GrowableArray<ciTypeFlow::Block*>* succs = tail->successors();
if (!is_an_if) {
assert(((wide ? iter.get_far_dest() : iter.get_dest()) == loop->head()->start()) == (succs->at(ciTypeFlow::GOTO_TARGET) == loop->head()), "branch should lead to loop head");
if (succs->at(ciTypeFlow::GOTO_TARGET) == loop->head()) {
return method()->scale_count(data->as_JumpData()->taken());
assert(((wide ? iter.get_far_dest() : iter.get_dest()) == head()->start()) == (succs->at(ciTypeFlow::GOTO_TARGET) == head()), "branch should lead to loop head");
if (succs->at(ciTypeFlow::GOTO_TARGET) == head()) {
_profiled_count = outer()->method()->scale_count(data->as_JumpData()->taken());
return _profiled_count;
}
} else {
assert((iter.get_dest() == loop->head()->start()) == (succs->at(ciTypeFlow::IF_TAKEN) == loop->head()), "bytecode and CFG not consistent");
assert((tail->limit() == loop->head()->start()) == (succs->at(ciTypeFlow::IF_NOT_TAKEN) == loop->head()), "bytecode and CFG not consistent");
if (succs->at(ciTypeFlow::IF_TAKEN) == loop->head()) {
return method()->scale_count(data->as_JumpData()->taken());
} else if (succs->at(ciTypeFlow::IF_NOT_TAKEN) == loop->head()) {
return method()->scale_count(data->as_BranchData()->not_taken());
assert((iter.get_dest() == head()->start()) == (succs->at(ciTypeFlow::IF_TAKEN) == head()), "bytecode and CFG not consistent");
assert((tail->limit() == head()->start()) == (succs->at(ciTypeFlow::IF_NOT_TAKEN) == head()), "bytecode and CFG not consistent");
if (succs->at(ciTypeFlow::IF_TAKEN) == head()) {
_profiled_count = outer()->method()->scale_count(data->as_JumpData()->taken());
return _profiled_count;
} else if (succs->at(ciTypeFlow::IF_NOT_TAKEN) == head()) {
_profiled_count = outer()->method()->scale_count(data->as_BranchData()->not_taken());
return _profiled_count;
}
}
return 0;
_profiled_count = 0;
return _profiled_count;
}
bool ciTypeFlow::Loop::at_insertion_point(Loop* lp, Loop* current) {
@ -2535,8 +2572,8 @@ bool ciTypeFlow::Loop::at_insertion_point(Loop* lp, Loop* current) {
}
// In the case of a shared head, make the most frequent head/tail (as reported by profiling) the inner loop
if (current->head() == lp->head()) {
int lp_count = outer()->profiled_count(lp);
int current_count = outer()->profiled_count(current);
int lp_count = lp->profiled_count();
int current_count = current->profiled_count();
if (current_count < lp_count) {
return true;
} else if (current_count > lp_count) {

@ -716,6 +716,7 @@ public:
Block* _tail; // Tail of loop
bool _irreducible;
LocalSet _def_locals;
int _profiled_count;
ciTypeFlow* outer() const { return head()->outer(); }
bool at_insertion_point(Loop* lp, Loop* current);
@ -724,7 +725,7 @@ public:
Loop(Block* head, Block* tail) :
_parent(NULL), _sibling(NULL), _child(NULL),
_head(head), _tail(tail),
_irreducible(false), _def_locals() {}
_irreducible(false), _def_locals(), _profiled_count(-1) {}
Loop* parent() const { return _parent; }
Loop* sibling() const { return _sibling; }
@ -760,6 +761,8 @@ public:
bool is_root() const { return _tail->pre_order() == max_jint; }
int profiled_count();
void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN;
};
@ -916,8 +919,6 @@ private:
// Create the block map, which indexes blocks in pre_order.
void map_blocks();
int profiled_count(ciTypeFlow::Loop* loop);
public:
// Perform type inference flow analysis.
void do_flow();

@ -766,6 +766,14 @@
"for at most jint_max / StressLongCountedLoop") \
range(0, max_juint) \
\
product(bool, DuplicateBackedge, true, DIAGNOSTIC, \
"Transform loop with a merge point into 2 loops if inner loop is" \
"expected to optimize better") \
\
develop(bool, StressDuplicateBackedge, false, \
"Run DuplicateBackedge whenever possible ignoring benefit" \
"analysis") \
\
product(bool, VerifyReceiverTypes, trueInDebug, DIAGNOSTIC, \
"Verify receiver types at runtime") \

@ -1014,192 +1014,165 @@ bool PhaseIdealLoop::loop_predication_should_follow_branches(IdealLoopTree *loop
return follow_branches;
}
// Compute probability of reaching some CFG node from a fixed
// dominating CFG node
class PathFrequency {
private:
Node* _dom; // frequencies are computed relative to this node
Node_Stack _stack;
GrowableArray<float> _freqs_stack; // keep track of intermediate result at regions
GrowableArray<float> _freqs; // cache frequencies
PhaseIdealLoop* _phase;
float check_and_truncate_frequency(float f) {
assert(f >= 0, "Incorrect frequency");
// We do not perform an exact (f <= 1) check
// this would be error prone with rounding of floats.
// Performing a check like (f <= 1+eps) would be of benefit,
// however, it is not evident how to determine such an eps,
// given that an arbitrary number of add/mul operations
// are performed on these frequencies.
return (f > 1) ? 1 : f;
}
public:
PathFrequency(Node* dom, PhaseIdealLoop* phase)
: _dom(dom), _stack(0), _phase(phase) {
}
float to(Node* n) {
// post order walk on the CFG graph from n to _dom
IdealLoopTree* loop = _phase->get_loop(_dom);
Node* c = n;
for (;;) {
assert(_phase->get_loop(c) == loop, "have to be in the same loop");
if (c == _dom || _freqs.at_grow(c->_idx, -1) >= 0) {
float f = c == _dom ? 1 : _freqs.at(c->_idx);
Node* prev = c;
while (_stack.size() > 0 && prev == c) {
Node* n = _stack.node();
if (!n->is_Region()) {
if (_phase->get_loop(n) != _phase->get_loop(n->in(0))) {
// Found an inner loop: compute frequency of reaching this
// exit from the loop head by looking at the number of
// times each loop exit was taken
IdealLoopTree* inner_loop = _phase->get_loop(n->in(0));
LoopNode* inner_head = inner_loop->_head->as_Loop();
assert(_phase->get_loop(n) == loop, "only 1 inner loop");
if (inner_head->is_OuterStripMinedLoop()) {
inner_head->verify_strip_mined(1);
if (n->in(0) == inner_head->in(LoopNode::LoopBackControl)->in(0)) {
n = n->in(0)->in(0)->in(0);
}
inner_loop = inner_loop->_child;
inner_head = inner_loop->_head->as_Loop();
inner_head->verify_strip_mined(1);
float PathFrequency::to(Node* n) {
// post order walk on the CFG graph from n to _dom
IdealLoopTree* loop = _phase->get_loop(_dom);
Node* c = n;
for (;;) {
assert(_phase->get_loop(c) == loop, "have to be in the same loop");
if (c == _dom || _freqs.at_grow(c->_idx, -1) >= 0) {
float f = c == _dom ? 1 : _freqs.at(c->_idx);
Node* prev = c;
while (_stack.size() > 0 && prev == c) {
Node* n = _stack.node();
if (!n->is_Region()) {
if (_phase->get_loop(n) != _phase->get_loop(n->in(0))) {
// Found an inner loop: compute frequency of reaching this
// exit from the loop head by looking at the number of
// times each loop exit was taken
IdealLoopTree* inner_loop = _phase->get_loop(n->in(0));
LoopNode* inner_head = inner_loop->_head->as_Loop();
assert(_phase->get_loop(n) == loop, "only 1 inner loop");
if (inner_head->is_OuterStripMinedLoop()) {
inner_head->verify_strip_mined(1);
if (n->in(0) == inner_head->in(LoopNode::LoopBackControl)->in(0)) {
n = n->in(0)->in(0)->in(0);
}
float loop_exit_cnt = 0.0f;
for (uint i = 0; i < inner_loop->_body.size(); i++) {
Node *n = inner_loop->_body[i];
float c = inner_loop->compute_profile_trip_cnt_helper(n);
loop_exit_cnt += c;
}
float cnt = -1;
if (n->in(0)->is_If()) {
IfNode* iff = n->in(0)->as_If();
float p = n->in(0)->as_If()->_prob;
if (n->Opcode() == Op_IfFalse) {
p = 1 - p;
}
if (p > PROB_MIN) {
cnt = p * iff->_fcnt;
} else {
cnt = 0;
}
} else {
assert(n->in(0)->is_Jump(), "unsupported node kind");
JumpNode* jmp = n->in(0)->as_Jump();
float p = n->in(0)->as_Jump()->_probs[n->as_JumpProj()->_con];
cnt = p * jmp->_fcnt;
}
float this_exit_f = cnt > 0 ? cnt / loop_exit_cnt : 0;
this_exit_f = check_and_truncate_frequency(this_exit_f);
f = f * this_exit_f;
f = check_and_truncate_frequency(f);
} else {
float p = -1;
if (n->in(0)->is_If()) {
p = n->in(0)->as_If()->_prob;
if (n->Opcode() == Op_IfFalse) {
p = 1 - p;
}
} else {
assert(n->in(0)->is_Jump(), "unsupported node kind");
p = n->in(0)->as_Jump()->_probs[n->as_JumpProj()->_con];
}
f = f * p;
f = check_and_truncate_frequency(f);
inner_loop = inner_loop->_child;
inner_head = inner_loop->_head->as_Loop();
inner_head->verify_strip_mined(1);
}
_freqs.at_put_grow(n->_idx, (float)f, -1);
_stack.pop();
} else {
float prev_f = _freqs_stack.pop();
float new_f = f;
f = new_f + prev_f;
float loop_exit_cnt = 0.0f;
for (uint i = 0; i < inner_loop->_body.size(); i++) {
Node *n = inner_loop->_body[i];
float c = inner_loop->compute_profile_trip_cnt_helper(n);
loop_exit_cnt += c;
}
float cnt = -1;
if (n->in(0)->is_If()) {
IfNode* iff = n->in(0)->as_If();
float p = n->in(0)->as_If()->_prob;
if (n->Opcode() == Op_IfFalse) {
p = 1 - p;
}
if (p > PROB_MIN) {
cnt = p * iff->_fcnt;
} else {
cnt = 0;
}
} else {
assert(n->in(0)->is_Jump(), "unsupported node kind");
JumpNode* jmp = n->in(0)->as_Jump();
float p = n->in(0)->as_Jump()->_probs[n->as_JumpProj()->_con];
cnt = p * jmp->_fcnt;
}
float this_exit_f = cnt > 0 ? cnt / loop_exit_cnt : 0;
this_exit_f = check_and_truncate_frequency(this_exit_f);
f = f * this_exit_f;
f = check_and_truncate_frequency(f);
uint i = _stack.index();
if (i < n->req()) {
c = n->in(i);
_stack.set_index(i+1);
_freqs_stack.push(f);
} else {
_freqs.at_put_grow(n->_idx, f, -1);
_stack.pop();
}
}
}
if (_stack.size() == 0) {
return check_and_truncate_frequency(f);
}
} else if (c->is_Loop()) {
ShouldNotReachHere();
c = c->in(LoopNode::EntryControl);
} else if (c->is_Region()) {
_freqs_stack.push(0);
_stack.push(c, 2);
c = c->in(1);
} else {
if (c->is_IfProj()) {
IfNode* iff = c->in(0)->as_If();
if (iff->_prob == PROB_UNKNOWN) {
// assume never taken
_freqs.at_put_grow(c->_idx, 0, -1);
} else if (_phase->get_loop(c) != _phase->get_loop(iff)) {
if (iff->_fcnt == COUNT_UNKNOWN) {
// assume never taken
_freqs.at_put_grow(c->_idx, 0, -1);
} else {
// skip over loop
_stack.push(c, 1);
c = _phase->get_loop(c->in(0))->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
}
} else {
_stack.push(c, 1);
c = iff;
}
} else if (c->is_JumpProj()) {
JumpNode* jmp = c->in(0)->as_Jump();
if (_phase->get_loop(c) != _phase->get_loop(jmp)) {
if (jmp->_fcnt == COUNT_UNKNOWN) {
// assume never taken
_freqs.at_put_grow(c->_idx, 0, -1);
float p = -1;
if (n->in(0)->is_If()) {
p = n->in(0)->as_If()->_prob;
if (n->Opcode() == Op_IfFalse) {
p = 1 - p;
}
} else {
// skip over loop
_stack.push(c, 1);
c = _phase->get_loop(c->in(0))->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
assert(n->in(0)->is_Jump(), "unsupported node kind");
p = n->in(0)->as_Jump()->_probs[n->as_JumpProj()->_con];
}
} else {
_stack.push(c, 1);
c = jmp;
f = f * p;
f = check_and_truncate_frequency(f);
}
} else if (c->Opcode() == Op_CatchProj &&
c->in(0)->Opcode() == Op_Catch &&
c->in(0)->in(0)->is_Proj() &&
c->in(0)->in(0)->in(0)->is_Call()) {
// assume exceptions are never thrown
uint con = c->as_Proj()->_con;
if (con == CatchProjNode::fall_through_index) {
Node* call = c->in(0)->in(0)->in(0)->in(0);
if (_phase->get_loop(call) != _phase->get_loop(c)) {
_freqs.at_put_grow(c->_idx, 0, -1);
} else {
c = call;
}
} else {
assert(con >= CatchProjNode::catch_all_index, "what else?");
_freqs.at_put_grow(c->_idx, 0, -1);
}
} else if (c->unique_ctrl_out_or_null() == NULL && !c->is_If() && !c->is_Jump()) {
ShouldNotReachHere();
_freqs.at_put_grow(n->_idx, (float)f, -1);
_stack.pop();
} else {
c = c->in(0);
float prev_f = _freqs_stack.pop();
float new_f = f;
f = new_f + prev_f;
f = check_and_truncate_frequency(f);
uint i = _stack.index();
if (i < n->req()) {
c = n->in(i);
_stack.set_index(i+1);
_freqs_stack.push(f);
} else {
_freqs.at_put_grow(n->_idx, f, -1);
_stack.pop();
}
}
}
if (_stack.size() == 0) {
return check_and_truncate_frequency(f);
}
} else if (c->is_Loop()) {
ShouldNotReachHere();
c = c->in(LoopNode::EntryControl);
} else if (c->is_Region()) {
_freqs_stack.push(0);
_stack.push(c, 2);
c = c->in(1);
} else {
if (c->is_IfProj()) {
IfNode* iff = c->in(0)->as_If();
if (iff->_prob == PROB_UNKNOWN) {
// assume never taken
_freqs.at_put_grow(c->_idx, 0, -1);
} else if (_phase->get_loop(c) != _phase->get_loop(iff)) {
if (iff->_fcnt == COUNT_UNKNOWN) {
// assume never taken
_freqs.at_put_grow(c->_idx, 0, -1);
} else {
// skip over loop
_stack.push(c, 1);
c = _phase->get_loop(c->in(0))->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
}
} else {
_stack.push(c, 1);
c = iff;
}
} else if (c->is_JumpProj()) {
JumpNode* jmp = c->in(0)->as_Jump();
if (_phase->get_loop(c) != _phase->get_loop(jmp)) {
if (jmp->_fcnt == COUNT_UNKNOWN) {
// assume never taken
_freqs.at_put_grow(c->_idx, 0, -1);
} else {
// skip over loop
_stack.push(c, 1);
c = _phase->get_loop(c->in(0))->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
}
} else {
_stack.push(c, 1);
c = jmp;
}
} else if (c->Opcode() == Op_CatchProj &&
c->in(0)->Opcode() == Op_Catch &&
c->in(0)->in(0)->is_Proj() &&
c->in(0)->in(0)->in(0)->is_Call()) {
// assume exceptions are never thrown
uint con = c->as_Proj()->_con;
if (con == CatchProjNode::fall_through_index) {
Node* call = c->in(0)->in(0)->in(0)->in(0);
if (_phase->get_loop(call) != _phase->get_loop(c)) {
_freqs.at_put_grow(c->_idx, 0, -1);
} else {
c = call;
}
} else {
assert(con >= CatchProjNode::catch_all_index, "what else?");
_freqs.at_put_grow(c->_idx, 0, -1);
}
} else if (c->unique_ctrl_out_or_null() == NULL && !c->is_If() && !c->is_Jump()) {
ShouldNotReachHere();
} else {
c = c->in(0);
}
}
ShouldNotReachHere();
return -1;
}
};
ShouldNotReachHere();
return -1;
}
void PhaseIdealLoop::loop_predication_follow_branches(Node *n, IdealLoopTree *loop, float loop_trip_cnt,
PathFrequency& pf, Node_Stack& stack, VectorSet& seen,

@ -3602,6 +3602,8 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
} else if (policy_unswitching(phase)) {
phase->do_unswitching(this, old_new);
return false; // need to recalculate idom data
} else if (phase->duplicate_loop_backedge(this, old_new)) {
return false;
} else if (_head->is_LongCountedLoop()) {
phase->create_loop_nest(this, old_new);
}
@ -3630,6 +3632,9 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
phase->do_maximally_unroll(this, old_new);
return true;
}
if (StressDuplicateBackedge && phase->duplicate_loop_backedge(this, old_new)) {
return false;
}
}
uint est_peeling = estimate_peeling(phase);

@ -1420,6 +1420,7 @@ public:
// Partially peel loop up through last_peel node.
bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);
// Create a scheduled list of nodes control dependent on ctrl set.
void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );
@ -1466,8 +1467,8 @@ public:
// "Nearly" because all Nodes have been cloned from the original in the loop,
// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
// through the Phi recursively, and return a Bool.
Node *clone_iff( PhiNode *phi, IdealLoopTree *loop );
CmpNode *clone_bool( PhiNode *phi, IdealLoopTree *loop );
Node* clone_iff(PhiNode* phi);
CmpNode* clone_bool(PhiNode* phi);
// Rework addressing expressions to get the most loop-invariant stuff
@ -1680,6 +1681,20 @@ public:
void push_pinned_nodes_thru_region(IfNode* dom_if, Node* region);
bool try_merge_identical_ifs(Node* n);
void clone_loop_body(const Node_List& body, Node_List &old_new, CloneMap* cm);
void fix_body_edges(const Node_List &body, IdealLoopTree* loop, const Node_List &old_new, int dd,
IdealLoopTree* parent, bool partial);
void fix_ctrl_uses(const Node_List& body, const IdealLoopTree* loop, Node_List &old_new, CloneLoopMode mode,
Node* side_by_side_idom, CloneMap* cm, Node_List &worklist);
void fix_data_uses(Node_List& body, IdealLoopTree* loop, CloneLoopMode mode, IdealLoopTree* outer_loop,
uint new_counter, Node_List& old_new, Node_List& worklist, Node_List*& split_if_set,
Node_List*& split_bool_set, Node_List*& split_cex_set);
void finish_clone_loop(Node_List* split_if_set, Node_List* split_bool_set, Node_List* split_cex_set);
};
@ -1829,4 +1844,33 @@ public:
IdealLoopTree* current() { return _curnt; } // Return current value of iterator.
};
// Compute probability of reaching some CFG node from a fixed
// dominating CFG node
class PathFrequency {
private:
Node* _dom; // frequencies are computed relative to this node
Node_Stack _stack;
GrowableArray<float> _freqs_stack; // keep track of intermediate result at regions
GrowableArray<float> _freqs; // cache frequencies
PhaseIdealLoop* _phase;
float check_and_truncate_frequency(float f) {
assert(f >= 0, "Incorrect frequency");
// We do not perform an exact (f <= 1) check
// this would be error prone with rounding of floats.
// Performing a check like (f <= 1+eps) would be of benefit,
// however, it is not evident how to determine such an eps,
// given that an arbitrary number of add/mul operations
// are performed on these frequencies.
return (f > 1) ? 1 : f;
}
public:
PathFrequency(Node* dom, PhaseIdealLoop* phase)
: _dom(dom), _stack(0), _phase(phase) {
}
float to(Node* n);
};
#endif // SHARE_OPTO_LOOPNODE_HPP

@ -1836,14 +1836,14 @@ void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack
// "Nearly" because all Nodes have been cloned from the original in the loop,
// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
// through the Phi recursively, and return a Bool.
Node* PhaseIdealLoop::clone_iff(PhiNode *phi, IdealLoopTree *loop) {
Node* PhaseIdealLoop::clone_iff(PhiNode* phi) {
// Convert this Phi into a Phi merging Bools
uint i;
for (i = 1; i < phi->req(); i++) {
Node *b = phi->in(i);
if (b->is_Phi()) {
_igvn.replace_input_of(phi, i, clone_iff(b->as_Phi(), loop));
_igvn.replace_input_of(phi, i, clone_iff(b->as_Phi()));
} else {
assert(b->is_Bool() || b->Opcode() == Op_Opaque4, "");
}
@ -1923,13 +1923,13 @@ Node* PhaseIdealLoop::clone_iff(PhiNode *phi, IdealLoopTree *loop) {
// "Nearly" because all Nodes have been cloned from the original in the loop,
// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
// through the Phi recursively, and return a Bool.
CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
CmpNode*PhaseIdealLoop::clone_bool(PhiNode* phi) {
uint i;
// Convert this Phi into a Phi merging Bools
for( i = 1; i < phi->req(); i++ ) {
Node *b = phi->in(i);
if( b->is_Phi() ) {
_igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
_igvn.replace_input_of(phi, i, clone_bool(b->as_Phi()));
} else {
assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
}
@ -2268,7 +2268,7 @@ void PhaseIdealLoop::clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealL
_igvn.register_new_node_with_optimizer(new_sfpt);
_igvn.register_new_node_with_optimizer(new_cle_out);
}
// Some other transformation may have pessimistically assign some
// Some other transformation may have pessimistically assigned some
// data nodes to the outer loop. Set their control so they are out
// of the outer loop.
ResourceMark rm;
@ -2340,7 +2340,6 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
}
CloneMap& cm = C->clone_map();
Dict* dict = cm.dict();
if (C->do_vector_loop()) {
cm.set_clone_idx(cm.max_gen()+1);
#ifndef PRODUCT
@ -2352,49 +2351,14 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
}
// Step 1: Clone the loop body. Make the old->new mapping.
uint i;
for (i = 0; i < loop->_body.size(); i++) {
Node* old = loop->_body.at(i);
Node* nnn = old->clone();
old_new.map(old->_idx, nnn);
if (old->is_reduction()) {
// Reduction flag is not copied by default. Copy it here when cloning the entire loop body.
nnn->add_flag(Node::Flag_is_reduction);
}
if (C->do_vector_loop()) {
cm.verify_insert_and_clone(old, nnn, cm.clone_idx());
}
_igvn.register_new_node_with_optimizer(nnn);
}
clone_loop_body(loop->_body, old_new, &cm);
IdealLoopTree* outer_loop = (head->is_strip_mined() && mode != IgnoreStripMined) ? get_loop(head->as_CountedLoop()->outer_loop()) : loop;
// Step 2: Fix the edges in the new body. If the old input is outside the
// loop use it. If the old input is INside the loop, use the corresponding
// new node instead.
for( i = 0; i < loop->_body.size(); i++ ) {
Node *old = loop->_body.at(i);
Node *nnn = old_new[old->_idx];
// Fix CFG/Loop controlling the new node
if (has_ctrl(old)) {
set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
} else {
set_loop(nnn, outer_loop->_parent);
if (old->outcnt() > 0) {
set_idom( nnn, old_new[idom(old)->_idx], dd );
}
}
// Correct edges to the new node
for( uint j = 0; j < nnn->req(); j++ ) {
Node *n = nnn->in(j);
if( n ) {
IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
if( loop->is_member( old_in_loop ) )
nnn->set_req(j, old_new[n->_idx]);
}
}
_igvn.hash_find_insert(nnn);
}
fix_body_edges(loop->_body, loop, old_new, dd, outer_loop->_parent, false);
Node_List extra_data_nodes; // data nodes in the outer strip mined loop
clone_outer_loop(head, mode, loop, outer_loop, dd, old_new, extra_data_nodes);
@ -2406,22 +2370,93 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
// refer to this.
Node_List worklist;
uint new_counter = C->unique();
for( i = 0; i < loop->_body.size(); i++ ) {
Node* old = loop->_body.at(i);
fix_ctrl_uses(loop->_body, loop, old_new, mode, side_by_side_idom, &cm, worklist);
// Step 4: If loop-invariant use is not control, it must be dominated by a
// loop exit IfFalse/IfTrue. Find "proper" loop exit. Make a Region
// there if needed. Make a Phi there merging old and new used values.
Node_List *split_if_set = NULL;
Node_List *split_bool_set = NULL;
Node_List *split_cex_set = NULL;
fix_data_uses(loop->_body, loop, mode, outer_loop, new_counter, old_new, worklist, split_if_set, split_bool_set, split_cex_set);
for (uint i = 0; i < extra_data_nodes.size(); i++) {
Node* old = extra_data_nodes.at(i);
clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
split_bool_set, split_cex_set, worklist, new_counter,
mode);
}
// Check for IFs that need splitting/cloning. Happens if an IF outside of
// the loop uses a condition set in the loop. The original IF probably
// takes control from one or more OLD Regions (which in turn get from NEW
// Regions). In any case, there will be a set of Phis for each merge point
// from the IF up to where the original BOOL def exists the loop.
finish_clone_loop(split_if_set, split_bool_set, split_cex_set);
}
void PhaseIdealLoop::finish_clone_loop(Node_List* split_if_set, Node_List* split_bool_set, Node_List* split_cex_set) {
if (split_if_set) {
while (split_if_set->size()) {
Node *iff = split_if_set->pop();
if (iff->in(1)->is_Phi()) {
Node *b = clone_iff(iff->in(1)->as_Phi());
_igvn.replace_input_of(iff, 1, b);
}
}
}
if (split_bool_set) {
while (split_bool_set->size()) {
Node *b = split_bool_set->pop();
Node *phi = b->in(1);
assert(phi->is_Phi(), "");
CmpNode *cmp = clone_bool((PhiNode*) phi);
_igvn.replace_input_of(b, 1, cmp);
}
}
if (split_cex_set) {
while (split_cex_set->size()) {
Node *b = split_cex_set->pop();
assert(b->in(0)->is_Region(), "");
assert(b->in(1)->is_Phi(), "");
assert(b->in(0)->in(0) == b->in(1)->in(0), "");
split_up(b, b->in(0), NULL);
}
}
}
void PhaseIdealLoop::fix_data_uses(Node_List& body, IdealLoopTree* loop, CloneLoopMode mode, IdealLoopTree* outer_loop,
uint new_counter, Node_List &old_new, Node_List &worklist, Node_List*& split_if_set,
Node_List*& split_bool_set, Node_List*& split_cex_set) {
for(uint i = 0; i < body.size(); i++ ) {
Node* old = body.at(i);
clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
split_bool_set, split_cex_set, worklist, new_counter,
mode);
}
}
void PhaseIdealLoop::fix_ctrl_uses(const Node_List& body, const IdealLoopTree* loop, Node_List &old_new, CloneLoopMode mode,
Node* side_by_side_idom, CloneMap* cm, Node_List &worklist) {
LoopNode* head = loop->_head->as_Loop();
for(uint i = 0; i < body.size(); i++ ) {
Node* old = body.at(i);
if( !old->is_CFG() ) continue;
// Copy uses to a worklist, so I can munge the def-use info
// with impunity.
for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) {
worklist.push(old->fast_out(j));
}
while( worklist.size() ) { // Visit all uses
while (worklist.size()) { // Visit all uses
Node *use = worklist.pop();
if (!has_node(use)) continue; // Ignore dead nodes
IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
if( !loop->is_member( use_loop ) && use->is_CFG() ) {
IdealLoopTree *use_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use );
if (!loop->is_member(use_loop) && use->is_CFG()) {
// Both OLD and USE are CFG nodes here.
assert( use->is_Proj(), "" );
assert(use->is_Proj(), "" );
Node* nnn = old_new[old->_idx];
Node* newuse = NULL;
@ -2445,8 +2480,8 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
}
// Clone the loop exit control projection
if (C->do_vector_loop()) {
cm.verify_insert_and_clone(use, newuse, cm.clone_idx());
if (C->do_vector_loop() && cm != NULL) {
cm->verify_insert_and_clone(use, newuse, cm->clone_idx());
}
newuse->set_req(0,nnn);
_igvn.register_new_node_with_optimizer(newuse);
@ -2458,18 +2493,18 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
RegionNode *r = new RegionNode(3);
// Map the old use to the new merge point
old_new.map( use->_idx, r );
uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
uint dd_r = MIN2(dom_depth(newuse), dom_depth(use));
assert(dd_r >= dom_depth(dom_lca(newuse, use)), "" );
// The original user of 'use' uses 'r' instead.
for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
Node* useuse = use->last_out(l);
_igvn.rehash_node_delayed(useuse);
uint uses_found = 0;
if( useuse->in(0) == use ) {
if (useuse->in(0) == use) {
useuse->set_req(0, r);
uses_found++;
if( useuse->is_CFG() ) {
if (useuse->is_CFG()) {
// This is not a dom_depth > dd_r because when new
// control flow is constructed by a loop opt, a node and
// its dominator can end up at the same dom_depth
@ -2477,7 +2512,7 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
set_idom(useuse, r, dom_depth(useuse));
}
}
for( uint k = 1; k < useuse->req(); k++ ) {
for (uint k = 1; k < useuse->req(); k++) {
if( useuse->in(k) == use ) {
useuse->set_req(k, r);
uses_found++;
@ -2494,68 +2529,67 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
}
// Now finish up 'r'
r->set_req( 1, newuse );
r->set_req( 2, use );
r->set_req(1, newuse);
r->set_req(2, use);
_igvn.register_new_node_with_optimizer(r);
set_loop(r, use_loop);
set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
set_idom(r, (side_by_side_idom == NULL) ? newuse->in(0) : side_by_side_idom, dd_r);
} // End of if a loop-exit test
}
}
}
// Step 4: If loop-invariant use is not control, it must be dominated by a
// loop exit IfFalse/IfTrue. Find "proper" loop exit. Make a Region
// there if needed. Make a Phi there merging old and new used values.
Node_List *split_if_set = NULL;
Node_List *split_bool_set = NULL;
Node_List *split_cex_set = NULL;
for( i = 0; i < loop->_body.size(); i++ ) {
Node* old = loop->_body.at(i);
clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
split_bool_set, split_cex_set, worklist, new_counter,
mode);
}
for (i = 0; i < extra_data_nodes.size(); i++) {
Node* old = extra_data_nodes.at(i);
clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
split_bool_set, split_cex_set, worklist, new_counter,
mode);
}
// Check for IFs that need splitting/cloning. Happens if an IF outside of
// the loop uses a condition set in the loop. The original IF probably
// takes control from one or more OLD Regions (which in turn get from NEW
// Regions). In any case, there will be a set of Phis for each merge point
// from the IF up to where the original BOOL def exists the loop.
if (split_if_set) {
while (split_if_set->size()) {
Node *iff = split_if_set->pop();
if (iff->in(1)->is_Phi()) {
Node *b = clone_iff(iff->in(1)->as_Phi(), loop);
_igvn.replace_input_of(iff, 1, b);
void PhaseIdealLoop::fix_body_edges(const Node_List &body, IdealLoopTree* loop, const Node_List &old_new, int dd,
IdealLoopTree* parent, bool partial) {
for(uint i = 0; i < body.size(); i++ ) {
Node *old = body.at(i);
Node *nnn = old_new[old->_idx];
// Fix CFG/Loop controlling the new node
if (has_ctrl(old)) {
set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
} else {
set_loop(nnn, parent);
if (old->outcnt() > 0) {
Node* dom = idom(old);
if (old_new[dom->_idx] != NULL) {
dom = old_new[dom->_idx];
set_idom(nnn, dom, dd );
}
}
}
}
if (split_bool_set) {
while (split_bool_set->size()) {
Node *b = split_bool_set->pop();
Node *phi = b->in(1);
assert(phi->is_Phi(), "");
CmpNode *cmp = clone_bool((PhiNode*)phi, loop);
_igvn.replace_input_of(b, 1, cmp);
}
}
if (split_cex_set) {
while (split_cex_set->size()) {
Node *b = split_cex_set->pop();
assert(b->in(0)->is_Region(), "");
assert(b->in(1)->is_Phi(), "");
assert(b->in(0)->in(0) == b->in(1)->in(0), "");
split_up(b, b->in(0), NULL);
// Correct edges to the new node
for (uint j = 0; j < nnn->req(); j++) {
Node *n = nnn->in(j);
if (n != NULL) {
IdealLoopTree *old_in_loop = get_loop(has_ctrl(n) ? get_ctrl(n) : n);
if (loop->is_member(old_in_loop)) {
if (old_new[n->_idx] != NULL) {
nnn->set_req(j, old_new[n->_idx]);
} else {
assert(!body.contains(n), "");
assert(partial, "node not cloned");
}
}
}
}
_igvn.hash_find_insert(nnn);
}
}
void PhaseIdealLoop::clone_loop_body(const Node_List& body, Node_List &old_new, CloneMap* cm) {
for (uint i = 0; i < body.size(); i++) {
Node* old = body.at(i);
Node* nnn = old->clone();
old_new.map(old->_idx, nnn);
if (old->is_reduction()) {
// Reduction flag is not copied by default. Copy it here when cloning the entire loop body.
nnn->add_flag(Node::Flag_is_reduction);
}
if (C->do_vector_loop() && cm != NULL) {
cm->verify_insert_and_clone(old, nnn, cm->clone_idx());
}
_igvn.register_new_node_with_optimizer(nnn);
}
}
@ -3770,6 +3804,285 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
return true;
}
// Transform:
//
// loop<-----------------+
// | |
// stmt1 stmt2 .. stmtn |
// | | | |
// \ | / |
// v v v |
// region |
// | |
// shared_stmt |
// | |
// v |
// if |
// / \ |
// | -----------+
// v
//
// into:
//
// loop<-------------------+
// | |
// v |
// +->loop |
// | | |
// | stmt1 stmt2 .. stmtn |
// | | | | |
// | | \ / |
// | | v v |
// | | region1 |
// | | | |
// | shared_stmt shared_stmt |
// | | | |
// | v v |
// | if if |
// | /\ / \ |
// +-- | | -------+
// \ /
// v v
// region2
//
// (region2 is shown to merge mirrored projections of the loop exit
// ifs to make the diagram clearer but they really merge the same
// projection)
//
// Conditions for this transformation to trigger:
// - the path through stmt1 is frequent enough
// - the inner loop will be turned into a counted loop after transformation
bool PhaseIdealLoop::duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new) {
if (!DuplicateBackedge) {
return false;
}
assert(!loop->_head->is_CountedLoop() || StressDuplicateBackedge, "Non-counted loop only");
if (!loop->_head->is_Loop()) {
return false;
}
uint estimate = loop->est_loop_clone_sz(1);
if (exceeding_node_budget(estimate)) {
return false;
}
LoopNode *head = loop->_head->as_Loop();
Node* region = NULL;
IfNode* exit_test = NULL;
uint inner;
float f;
if (StressDuplicateBackedge) {
if (head->is_strip_mined()) {
return false;
}
Node* c = head->in(LoopNode::LoopBackControl);
while (c != head) {
if (c->is_Region()) {
region = c;
}
c = idom(c);
}
if (region == NULL) {
return false;
}
inner = 1;
} else {
// Is the shape of the loop that of a counted loop...
Node* back_control = loop_exit_control(head, loop);
if (back_control == NULL) {
return false;
}
BoolTest::mask bt = BoolTest::illegal;
float cl_prob = 0;
Node* incr = NULL;
Node* limit = NULL;
Node* cmp = loop_exit_test(back_control, loop, incr, limit, bt, cl_prob);
if (cmp == NULL || cmp->Opcode() != Op_CmpI) {
return false;
}
// With an extra phi for the candidate iv?
if (!incr->is_Phi()) {
return false;
}
PathFrequency pf(head, this);
region = incr->in(0);
// Go over all paths for the extra phi's region and see if that
// path is frequent enough and would match the expected iv shape
// if the extra phi is removed
inner = 0;
for (uint i = 1; i < incr->req(); ++i) {
Node* in = incr->in(i);
Node* trunc1 = NULL;
Node* trunc2 = NULL;
const TypeInteger* iv_trunc_t = NULL;
Node* orig_in = in;
if (!(in = CountedLoopNode::match_incr_with_optional_truncation(in, &trunc1, &trunc2, &iv_trunc_t, T_INT))) {
continue;
}
assert(in->Opcode() == Op_AddI, "wrong increment code");
Node* xphi = NULL;
Node* stride = loop_iv_stride(in, loop, xphi);
if (stride == NULL) {
continue;
}
PhiNode* phi = loop_iv_phi(xphi, NULL, head, loop);
if (phi == NULL ||
(trunc1 == NULL && phi->in(LoopNode::LoopBackControl) != incr) ||
(trunc1 != NULL && phi->in(LoopNode::LoopBackControl) != trunc1)) {
return false;
}
f = pf.to(region->in(i));
if (f > 0.5) {
inner = i;
break;
}
}
if (inner == 0) {
return false;
}
exit_test = back_control->in(0)->as_If();
}
if (idom(region)->is_Catch()) {
return false;
}
// Collect all control nodes that need to be cloned (shared_stmt in the diagram)
Unique_Node_List wq;
wq.push(head->in(LoopNode::LoopBackControl));
for (uint i = 0; i < wq.size(); i++) {
Node* c = wq.at(i);
assert(get_loop(c) == loop, "not in the right loop?");
if (c->is_Region()) {
if (c != region) {
for (uint j = 1; j < c->req(); ++j) {
wq.push(c->in(j));
}
}
} else {
wq.push(c->in(0));
}
assert(!is_dominator(c, region) || c == region, "shouldn't go above region");
}
Node* region_dom = idom(region);
// Can't do the transformation if this would cause a membar pair to
// be split
for (uint i = 0; i < wq.size(); i++) {
Node* c = wq.at(i);
if (c->is_MemBar() && (c->as_MemBar()->trailing_store() || c->as_MemBar()->trailing_load_store())) {
assert(c->as_MemBar()->leading_membar()->trailing_membar() == c, "bad membar pair");
if (!wq.member(c->as_MemBar()->leading_membar())) {
return false;
}
}
}
// Collect data nodes that need to be clones as well
int dd = dom_depth(head);
for (uint i = 0; i < loop->_body.size(); ++i) {
Node* n = loop->_body.at(i);
if (has_ctrl(n)) {
Node* c = get_ctrl(n);
if (wq.member(c)) {
wq.push(n);
}
} else {
set_idom(n, idom(n), dd);
}
}
// clone shared_stmt
clone_loop_body(wq, old_new, NULL);
Node* region_clone = old_new[region->_idx];
region_clone->set_req(inner, C->top());
set_idom(region, region->in(inner), dd);
// Prepare the outer loop
Node* outer_head = new LoopNode(head->in(LoopNode::EntryControl), old_new[head->in(LoopNode::LoopBackControl)->_idx]);
register_control(outer_head, loop->_parent, outer_head->in(LoopNode::EntryControl));
_igvn.replace_input_of(head, LoopNode::EntryControl, outer_head);
set_idom(head, outer_head, dd);
fix_body_edges(wq, loop, old_new, dd, loop->_parent, true);
// Make one of the shared_stmt copies only reachable from stmt1, the
// other only from stmt2..stmtn.
Node* dom = NULL;
for (uint i = 1; i < region->req(); ++i) {
if (i != inner) {
_igvn.replace_input_of(region, i, C->top());
}
Node* in = region_clone->in(i);
if (in->is_top()) {
continue;
}
if (dom == NULL) {
dom = in;
} else {
dom = dom_lca(dom, in);
}
}
set_idom(region_clone, dom, dd);
// Set up the outer loop
for (uint i = 0; i < head->outcnt(); i++) {
Node* u = head->raw_out(i);
if (u->is_Phi()) {
Node* outer_phi = u->clone();
outer_phi->set_req(0, outer_head);
Node* backedge = old_new[u->in(LoopNode::LoopBackControl)->_idx];
if (backedge == NULL) {
backedge = u->in(LoopNode::LoopBackControl);
}
outer_phi->set_req(LoopNode::LoopBackControl, backedge);
register_new_node(outer_phi, outer_head);
_igvn.replace_input_of(u, LoopNode::EntryControl, outer_phi);
}
}
// create control and data nodes for out of loop uses (including region2)
Node_List worklist;
uint new_counter = C->unique();
fix_ctrl_uses(wq, loop, old_new, ControlAroundStripMined, outer_head, NULL, worklist);
Node_List *split_if_set = NULL;
Node_List *split_bool_set = NULL;
Node_List *split_cex_set = NULL;
fix_data_uses(wq, loop, ControlAroundStripMined, head->is_strip_mined() ? loop->_parent : loop, new_counter, old_new, worklist, split_if_set, split_bool_set, split_cex_set);
finish_clone_loop(split_if_set, split_bool_set, split_cex_set);
if (exit_test != NULL) {
float cnt = exit_test->_fcnt;
if (cnt != COUNT_UNKNOWN) {
exit_test->_fcnt = cnt * f;
old_new[exit_test->_idx]->as_If()->_fcnt = cnt * (1 - f);
}
}
C->set_major_progress();
return true;
}
//------------------------------reorg_offsets----------------------------------
// Reorganize offset computations to lower register pressure. Mostly
// prevent loop-fallout uses of the pre-incremented trip counter (which are

@ -0,0 +1,64 @@
/*
* Copyright (c) 2022, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.c2.irTests;
import compiler.lib.ir_framework.*;
import java.util.Objects;
/*
* @test
* @bug 8279888
* @summary Local variable independently used by multiple loops can interfere with loop optimizations
* @library /test/lib /
* @run driver compiler.c2.irTests.TestDuplicateBackedge
*/
public class TestDuplicateBackedge {
public static void main(String[] args) {
TestFramework.runWithFlags("-XX:LoopMaxUnroll=1");
TestFramework.runWithFlags("-XX:LoopMaxUnroll=1", "-XX:-DuplicateBackedge");
}
@Test
@IR(applyIf = { "DuplicateBackedge", "true" }, counts = { IRNode.LOOP, "1", IRNode.COUNTEDLOOP, "1" })
@IR(applyIf = { "DuplicateBackedge", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "DuplicateBackedge", "false" }, failOn = { IRNode.COUNTEDLOOP })
public static float test() {
float res = 1;
for (int i = 1;;) {
if (i % 10 == 0) {
i = (i * 2) + 1;
res /= 42;
} else {
i++;
res *= 42;
}
if (i >= 1000) {
break;
}
}
return res;
}
}

@ -0,0 +1,210 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler;
import org.openjdk.jmh.annotations.*;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
/**
* Examine issues with (potentially) uninitialized locals interfering with
* loop optimizations
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3)
@Warmup(iterations = 5, time = 2)
@Measurement(iterations = 5, time = 3)
@State(Scope.Thread)
public class LoopLocals {
public char[] bytesStartingWithNegative = """
\uFF11
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu
urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.
Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et
sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum
dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent
per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla
sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida
efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.
Suspendisse potenti.
Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis
nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet
sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum
consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.
Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id
hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl
euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus,
tristique mollis odio blandit quis. Vivamus posuere.
""".toCharArray();
public char[] bytesEndingWithNegative = """
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu
urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.
Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et
sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum
dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent
per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla
sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida
efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.
Suspendisse potenti.
Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis
nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet
sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum
consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.
Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id
hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl
euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus,
tristique mollis odio blandit quis. Vivamus posuere. \uFF11
""".toCharArray();
@Param({"startNonASCII", "endNonASCII", "mixed"})
private String variant;
private char[] val;
@Setup
public void setup() {
val = switch (variant) {
case "startNonASCII" -> bytesStartingWithNegative;
case "endNonASCII" -> bytesEndingWithNegative;
case "mixed" -> {
char[] chars = bytesEndingWithNegative.clone();
var random = new Random(0L);
for (int i = 0; i < chars.length; i++) {
if (random.nextInt(100) < 30) {
chars[i] = (char)(chars[i] + random.nextInt(0x2F00));
}
}
yield chars;
}
default -> throw new RuntimeException("Unknown variant: " + variant);
};
}
@Benchmark
public byte[] loopsWithSharedLocal() {
int dp = 0;
int sp = 0;
int sl = val.length >> 1;
byte[] dst = new byte[sl * 3];
char c;
while (sp < sl && (c = getChar(val, sp)) < '\u0080') {
dst[dp++] = (byte)c;
sp++;
}
while (sp < sl) {
c = getChar(val, sp++);
if (c < 0x80) {
dst[dp++] = (byte)c;
} else if (c < 0x800) {
dst[dp++] = (byte)(0xc0 | (c >> 6));
dst[dp++] = (byte)(0x80 | (c & 0x3f));
} else if (Character.isSurrogate(c)) {
int uc = -1;
char c2;
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(c2 = getChar(val, sp))) {
uc = Character.toCodePoint(c, c2);
}
if (uc < 0) {
dst[dp++] = '?';
} else {
dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
dst[dp++] = (byte)(0x80 | (uc & 0x3f));
sp++; // 2 chars
}
} else {
// 3 bytes, 16 bits
dst[dp++] = (byte)(0xe0 | ((c >> 12)));
dst[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
dst[dp++] = (byte)(0x80 | (c & 0x3f));
}
}
if (dp == dst.length) {
return dst;
}
return Arrays.copyOf(dst, dp);
}
@Benchmark
public byte[] loopsWithScopedLocal() {
int dp = 0;
int sp = 0;
int sl = val.length >> 1;
byte[] dst = new byte[sl * 3];
while (sp < sl) {
// ascii fast loop;
char c = getChar(val, sp);
if (c >= '\u0080') {
break;
}
dst[dp++] = (byte)c;
sp++;
}
while (sp < sl) {
char c = getChar(val, sp++);
if (c < 0x80) {
dst[dp++] = (byte)c;
} else if (c < 0x800) {
dst[dp++] = (byte)(0xc0 | (c >> 6));
dst[dp++] = (byte)(0x80 | (c & 0x3f));
} else if (Character.isSurrogate(c)) {
int uc = -1;
char c2;
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(c2 = getChar(val, sp))) {
uc = Character.toCodePoint(c, c2);
}
if (uc < 0) {
dst[dp++] = '?';
} else {
dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
dst[dp++] = (byte)(0x80 | (uc & 0x3f));
sp++; // 2 chars
}
} else {
// 3 bytes, 16 bits
dst[dp++] = (byte)(0xe0 | ((c >> 12)));
dst[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
dst[dp++] = (byte)(0x80 | (c & 0x3f));
}
}
if (dp == dst.length) {
return dst;
}
return Arrays.copyOf(dst, dp);
}
static char getChar(char[] val, int index) {
return val[index];
}
}