8279888: Local variable independently used by multiple loops can interfere with loop optimizations
Co-authored-by: Claes Redestad <redestad@openjdk.org> Reviewed-by: thartmann, kvn
This commit is contained in:
parent
4c22a9bcf9
commit
32593df392
src/hotspot/share
ci
opto
test
hotspot/jtreg/compiler/c2/irTests
micro/org/openjdk/bench/vm/compiler
@ -2288,21 +2288,48 @@ ciTypeFlow::Block* ciTypeFlow::clone_loop_head(Loop* lp, StateVector* temp_vecto
|
||||
assert(!clone->has_pre_order(), "just created");
|
||||
clone->set_next_pre_order();
|
||||
|
||||
// Insert clone after (orig) tail in reverse post order
|
||||
clone->set_rpo_next(tail->rpo_next());
|
||||
tail->set_rpo_next(clone);
|
||||
|
||||
// tail->head becomes tail->clone
|
||||
for (SuccIter iter(tail); !iter.done(); iter.next()) {
|
||||
if (iter.succ() == head) {
|
||||
iter.set_succ(clone);
|
||||
// Update predecessor information
|
||||
head->predecessors()->remove(tail);
|
||||
clone->predecessors()->append(tail);
|
||||
// Accumulate profiled count for all backedges that share this loop's head
|
||||
int total_count = lp->profiled_count();
|
||||
for (Loop* lp1 = lp->parent(); lp1 != NULL; lp1 = lp1->parent()) {
|
||||
for (Loop* lp2 = lp1; lp2 != NULL; lp2 = lp2->sibling()) {
|
||||
if (lp2->head() == head && !lp2->tail()->is_backedge_copy()) {
|
||||
total_count += lp2->profiled_count();
|
||||
}
|
||||
}
|
||||
}
|
||||
flow_block(tail, temp_vector, temp_set);
|
||||
// Have the most frequent ones branch to the clone instead
|
||||
int count = 0;
|
||||
int nb = 0;
|
||||
Block* latest_tail = tail;
|
||||
bool done = false;
|
||||
for (Loop* lp1 = lp; lp1 != NULL && !done; lp1 = lp1->parent()) {
|
||||
for (Loop* lp2 = lp1; lp2 != NULL && !done; lp2 = lp2->sibling()) {
|
||||
if (lp2->head() == head && !lp2->tail()->is_backedge_copy()) {
|
||||
count += lp2->profiled_count();
|
||||
if (lp2->tail()->post_order() < latest_tail->post_order()) {
|
||||
latest_tail = lp2->tail();
|
||||
}
|
||||
nb++;
|
||||
for (SuccIter iter(lp2->tail()); !iter.done(); iter.next()) {
|
||||
if (iter.succ() == head) {
|
||||
iter.set_succ(clone);
|
||||
// Update predecessor information
|
||||
head->predecessors()->remove(lp2->tail());
|
||||
clone->predecessors()->append(lp2->tail());
|
||||
}
|
||||
}
|
||||
flow_block(lp2->tail(), temp_vector, temp_set);
|
||||
if (total_count == 0 || count > (total_count * .9)) {
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(nb >= 1, "at least one new");
|
||||
clone->set_rpo_next(latest_tail->rpo_next());
|
||||
latest_tail->set_rpo_next(clone);
|
||||
if (head == tail) {
|
||||
assert(nb == 1, "only when the head is not shared");
|
||||
// For self-loops, clone->head becomes clone->clone
|
||||
flow_block(clone, temp_vector, temp_set);
|
||||
for (SuccIter iter(clone); !iter.done(); iter.next()) {
|
||||
@ -2454,23 +2481,29 @@ void ciTypeFlow::PreorderLoops::next() {
|
||||
}
|
||||
|
||||
// If the tail is a branch to the head, retrieve how many times that path was taken from profiling
|
||||
int ciTypeFlow::profiled_count(ciTypeFlow::Loop* loop) {
|
||||
ciMethodData* methodData = method()->method_data();
|
||||
int ciTypeFlow::Loop::profiled_count() {
|
||||
if (_profiled_count >= 0) {
|
||||
return _profiled_count;
|
||||
}
|
||||
ciMethodData* methodData = outer()->method()->method_data();
|
||||
if (!methodData->is_mature()) {
|
||||
_profiled_count = 0;
|
||||
return 0;
|
||||
}
|
||||
ciTypeFlow::Block* tail = loop->tail();
|
||||
ciTypeFlow::Block* tail = this->tail();
|
||||
if (tail->control() == -1 || tail->has_trap()) {
|
||||
_profiled_count = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ciProfileData* data = methodData->bci_to_data(tail->control());
|
||||
|
||||
if (data == NULL || !data->is_JumpData()) {
|
||||
_profiled_count = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ciBytecodeStream iter(method());
|
||||
ciBytecodeStream iter(outer()->method());
|
||||
iter.reset_to_bci(tail->control());
|
||||
|
||||
bool is_an_if = false;
|
||||
@ -2509,21 +2542,25 @@ int ciTypeFlow::profiled_count(ciTypeFlow::Loop* loop) {
|
||||
GrowableArray<ciTypeFlow::Block*>* succs = tail->successors();
|
||||
|
||||
if (!is_an_if) {
|
||||
assert(((wide ? iter.get_far_dest() : iter.get_dest()) == loop->head()->start()) == (succs->at(ciTypeFlow::GOTO_TARGET) == loop->head()), "branch should lead to loop head");
|
||||
if (succs->at(ciTypeFlow::GOTO_TARGET) == loop->head()) {
|
||||
return method()->scale_count(data->as_JumpData()->taken());
|
||||
assert(((wide ? iter.get_far_dest() : iter.get_dest()) == head()->start()) == (succs->at(ciTypeFlow::GOTO_TARGET) == head()), "branch should lead to loop head");
|
||||
if (succs->at(ciTypeFlow::GOTO_TARGET) == head()) {
|
||||
_profiled_count = outer()->method()->scale_count(data->as_JumpData()->taken());
|
||||
return _profiled_count;
|
||||
}
|
||||
} else {
|
||||
assert((iter.get_dest() == loop->head()->start()) == (succs->at(ciTypeFlow::IF_TAKEN) == loop->head()), "bytecode and CFG not consistent");
|
||||
assert((tail->limit() == loop->head()->start()) == (succs->at(ciTypeFlow::IF_NOT_TAKEN) == loop->head()), "bytecode and CFG not consistent");
|
||||
if (succs->at(ciTypeFlow::IF_TAKEN) == loop->head()) {
|
||||
return method()->scale_count(data->as_JumpData()->taken());
|
||||
} else if (succs->at(ciTypeFlow::IF_NOT_TAKEN) == loop->head()) {
|
||||
return method()->scale_count(data->as_BranchData()->not_taken());
|
||||
assert((iter.get_dest() == head()->start()) == (succs->at(ciTypeFlow::IF_TAKEN) == head()), "bytecode and CFG not consistent");
|
||||
assert((tail->limit() == head()->start()) == (succs->at(ciTypeFlow::IF_NOT_TAKEN) == head()), "bytecode and CFG not consistent");
|
||||
if (succs->at(ciTypeFlow::IF_TAKEN) == head()) {
|
||||
_profiled_count = outer()->method()->scale_count(data->as_JumpData()->taken());
|
||||
return _profiled_count;
|
||||
} else if (succs->at(ciTypeFlow::IF_NOT_TAKEN) == head()) {
|
||||
_profiled_count = outer()->method()->scale_count(data->as_BranchData()->not_taken());
|
||||
return _profiled_count;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
_profiled_count = 0;
|
||||
return _profiled_count;
|
||||
}
|
||||
|
||||
bool ciTypeFlow::Loop::at_insertion_point(Loop* lp, Loop* current) {
|
||||
@ -2535,8 +2572,8 @@ bool ciTypeFlow::Loop::at_insertion_point(Loop* lp, Loop* current) {
|
||||
}
|
||||
// In the case of a shared head, make the most frequent head/tail (as reported by profiling) the inner loop
|
||||
if (current->head() == lp->head()) {
|
||||
int lp_count = outer()->profiled_count(lp);
|
||||
int current_count = outer()->profiled_count(current);
|
||||
int lp_count = lp->profiled_count();
|
||||
int current_count = current->profiled_count();
|
||||
if (current_count < lp_count) {
|
||||
return true;
|
||||
} else if (current_count > lp_count) {
|
||||
|
@ -716,6 +716,7 @@ public:
|
||||
Block* _tail; // Tail of loop
|
||||
bool _irreducible;
|
||||
LocalSet _def_locals;
|
||||
int _profiled_count;
|
||||
|
||||
ciTypeFlow* outer() const { return head()->outer(); }
|
||||
bool at_insertion_point(Loop* lp, Loop* current);
|
||||
@ -724,7 +725,7 @@ public:
|
||||
Loop(Block* head, Block* tail) :
|
||||
_parent(NULL), _sibling(NULL), _child(NULL),
|
||||
_head(head), _tail(tail),
|
||||
_irreducible(false), _def_locals() {}
|
||||
_irreducible(false), _def_locals(), _profiled_count(-1) {}
|
||||
|
||||
Loop* parent() const { return _parent; }
|
||||
Loop* sibling() const { return _sibling; }
|
||||
@ -760,6 +761,8 @@ public:
|
||||
|
||||
bool is_root() const { return _tail->pre_order() == max_jint; }
|
||||
|
||||
int profiled_count();
|
||||
|
||||
void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN;
|
||||
};
|
||||
|
||||
@ -916,8 +919,6 @@ private:
|
||||
// Create the block map, which indexes blocks in pre_order.
|
||||
void map_blocks();
|
||||
|
||||
int profiled_count(ciTypeFlow::Loop* loop);
|
||||
|
||||
public:
|
||||
// Perform type inference flow analysis.
|
||||
void do_flow();
|
||||
|
@ -766,6 +766,14 @@
|
||||
"for at most jint_max / StressLongCountedLoop") \
|
||||
range(0, max_juint) \
|
||||
\
|
||||
product(bool, DuplicateBackedge, true, DIAGNOSTIC, \
|
||||
"Transform loop with a merge point into 2 loops if inner loop is" \
|
||||
"expected to optimize better") \
|
||||
\
|
||||
develop(bool, StressDuplicateBackedge, false, \
|
||||
"Run DuplicateBackedge whenever possible ignoring benefit" \
|
||||
"analysis") \
|
||||
\
|
||||
product(bool, VerifyReceiverTypes, trueInDebug, DIAGNOSTIC, \
|
||||
"Verify receiver types at runtime") \
|
||||
|
||||
|
@ -1014,192 +1014,165 @@ bool PhaseIdealLoop::loop_predication_should_follow_branches(IdealLoopTree *loop
|
||||
return follow_branches;
|
||||
}
|
||||
|
||||
// Compute probability of reaching some CFG node from a fixed
|
||||
// dominating CFG node
|
||||
class PathFrequency {
|
||||
private:
|
||||
Node* _dom; // frequencies are computed relative to this node
|
||||
Node_Stack _stack;
|
||||
GrowableArray<float> _freqs_stack; // keep track of intermediate result at regions
|
||||
GrowableArray<float> _freqs; // cache frequencies
|
||||
PhaseIdealLoop* _phase;
|
||||
|
||||
float check_and_truncate_frequency(float f) {
|
||||
assert(f >= 0, "Incorrect frequency");
|
||||
// We do not perform an exact (f <= 1) check
|
||||
// this would be error prone with rounding of floats.
|
||||
// Performing a check like (f <= 1+eps) would be of benefit,
|
||||
// however, it is not evident how to determine such an eps,
|
||||
// given that an arbitrary number of add/mul operations
|
||||
// are performed on these frequencies.
|
||||
return (f > 1) ? 1 : f;
|
||||
}
|
||||
|
||||
public:
|
||||
PathFrequency(Node* dom, PhaseIdealLoop* phase)
|
||||
: _dom(dom), _stack(0), _phase(phase) {
|
||||
}
|
||||
|
||||
float to(Node* n) {
|
||||
// post order walk on the CFG graph from n to _dom
|
||||
IdealLoopTree* loop = _phase->get_loop(_dom);
|
||||
Node* c = n;
|
||||
for (;;) {
|
||||
assert(_phase->get_loop(c) == loop, "have to be in the same loop");
|
||||
if (c == _dom || _freqs.at_grow(c->_idx, -1) >= 0) {
|
||||
float f = c == _dom ? 1 : _freqs.at(c->_idx);
|
||||
Node* prev = c;
|
||||
while (_stack.size() > 0 && prev == c) {
|
||||
Node* n = _stack.node();
|
||||
if (!n->is_Region()) {
|
||||
if (_phase->get_loop(n) != _phase->get_loop(n->in(0))) {
|
||||
// Found an inner loop: compute frequency of reaching this
|
||||
// exit from the loop head by looking at the number of
|
||||
// times each loop exit was taken
|
||||
IdealLoopTree* inner_loop = _phase->get_loop(n->in(0));
|
||||
LoopNode* inner_head = inner_loop->_head->as_Loop();
|
||||
assert(_phase->get_loop(n) == loop, "only 1 inner loop");
|
||||
if (inner_head->is_OuterStripMinedLoop()) {
|
||||
inner_head->verify_strip_mined(1);
|
||||
if (n->in(0) == inner_head->in(LoopNode::LoopBackControl)->in(0)) {
|
||||
n = n->in(0)->in(0)->in(0);
|
||||
}
|
||||
inner_loop = inner_loop->_child;
|
||||
inner_head = inner_loop->_head->as_Loop();
|
||||
inner_head->verify_strip_mined(1);
|
||||
float PathFrequency::to(Node* n) {
|
||||
// post order walk on the CFG graph from n to _dom
|
||||
IdealLoopTree* loop = _phase->get_loop(_dom);
|
||||
Node* c = n;
|
||||
for (;;) {
|
||||
assert(_phase->get_loop(c) == loop, "have to be in the same loop");
|
||||
if (c == _dom || _freqs.at_grow(c->_idx, -1) >= 0) {
|
||||
float f = c == _dom ? 1 : _freqs.at(c->_idx);
|
||||
Node* prev = c;
|
||||
while (_stack.size() > 0 && prev == c) {
|
||||
Node* n = _stack.node();
|
||||
if (!n->is_Region()) {
|
||||
if (_phase->get_loop(n) != _phase->get_loop(n->in(0))) {
|
||||
// Found an inner loop: compute frequency of reaching this
|
||||
// exit from the loop head by looking at the number of
|
||||
// times each loop exit was taken
|
||||
IdealLoopTree* inner_loop = _phase->get_loop(n->in(0));
|
||||
LoopNode* inner_head = inner_loop->_head->as_Loop();
|
||||
assert(_phase->get_loop(n) == loop, "only 1 inner loop");
|
||||
if (inner_head->is_OuterStripMinedLoop()) {
|
||||
inner_head->verify_strip_mined(1);
|
||||
if (n->in(0) == inner_head->in(LoopNode::LoopBackControl)->in(0)) {
|
||||
n = n->in(0)->in(0)->in(0);
|
||||
}
|
||||
float loop_exit_cnt = 0.0f;
|
||||
for (uint i = 0; i < inner_loop->_body.size(); i++) {
|
||||
Node *n = inner_loop->_body[i];
|
||||
float c = inner_loop->compute_profile_trip_cnt_helper(n);
|
||||
loop_exit_cnt += c;
|
||||
}
|
||||
float cnt = -1;
|
||||
if (n->in(0)->is_If()) {
|
||||
IfNode* iff = n->in(0)->as_If();
|
||||
float p = n->in(0)->as_If()->_prob;
|
||||
if (n->Opcode() == Op_IfFalse) {
|
||||
p = 1 - p;
|
||||
}
|
||||
if (p > PROB_MIN) {
|
||||
cnt = p * iff->_fcnt;
|
||||
} else {
|
||||
cnt = 0;
|
||||
}
|
||||
} else {
|
||||
assert(n->in(0)->is_Jump(), "unsupported node kind");
|
||||
JumpNode* jmp = n->in(0)->as_Jump();
|
||||
float p = n->in(0)->as_Jump()->_probs[n->as_JumpProj()->_con];
|
||||
cnt = p * jmp->_fcnt;
|
||||
}
|
||||
float this_exit_f = cnt > 0 ? cnt / loop_exit_cnt : 0;
|
||||
this_exit_f = check_and_truncate_frequency(this_exit_f);
|
||||
f = f * this_exit_f;
|
||||
f = check_and_truncate_frequency(f);
|
||||
} else {
|
||||
float p = -1;
|
||||
if (n->in(0)->is_If()) {
|
||||
p = n->in(0)->as_If()->_prob;
|
||||
if (n->Opcode() == Op_IfFalse) {
|
||||
p = 1 - p;
|
||||
}
|
||||
} else {
|
||||
assert(n->in(0)->is_Jump(), "unsupported node kind");
|
||||
p = n->in(0)->as_Jump()->_probs[n->as_JumpProj()->_con];
|
||||
}
|
||||
f = f * p;
|
||||
f = check_and_truncate_frequency(f);
|
||||
inner_loop = inner_loop->_child;
|
||||
inner_head = inner_loop->_head->as_Loop();
|
||||
inner_head->verify_strip_mined(1);
|
||||
}
|
||||
_freqs.at_put_grow(n->_idx, (float)f, -1);
|
||||
_stack.pop();
|
||||
} else {
|
||||
float prev_f = _freqs_stack.pop();
|
||||
float new_f = f;
|
||||
f = new_f + prev_f;
|
||||
float loop_exit_cnt = 0.0f;
|
||||
for (uint i = 0; i < inner_loop->_body.size(); i++) {
|
||||
Node *n = inner_loop->_body[i];
|
||||
float c = inner_loop->compute_profile_trip_cnt_helper(n);
|
||||
loop_exit_cnt += c;
|
||||
}
|
||||
float cnt = -1;
|
||||
if (n->in(0)->is_If()) {
|
||||
IfNode* iff = n->in(0)->as_If();
|
||||
float p = n->in(0)->as_If()->_prob;
|
||||
if (n->Opcode() == Op_IfFalse) {
|
||||
p = 1 - p;
|
||||
}
|
||||
if (p > PROB_MIN) {
|
||||
cnt = p * iff->_fcnt;
|
||||
} else {
|
||||
cnt = 0;
|
||||
}
|
||||
} else {
|
||||
assert(n->in(0)->is_Jump(), "unsupported node kind");
|
||||
JumpNode* jmp = n->in(0)->as_Jump();
|
||||
float p = n->in(0)->as_Jump()->_probs[n->as_JumpProj()->_con];
|
||||
cnt = p * jmp->_fcnt;
|
||||
}
|
||||
float this_exit_f = cnt > 0 ? cnt / loop_exit_cnt : 0;
|
||||
this_exit_f = check_and_truncate_frequency(this_exit_f);
|
||||
f = f * this_exit_f;
|
||||
f = check_and_truncate_frequency(f);
|
||||
uint i = _stack.index();
|
||||
if (i < n->req()) {
|
||||
c = n->in(i);
|
||||
_stack.set_index(i+1);
|
||||
_freqs_stack.push(f);
|
||||
} else {
|
||||
_freqs.at_put_grow(n->_idx, f, -1);
|
||||
_stack.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (_stack.size() == 0) {
|
||||
return check_and_truncate_frequency(f);
|
||||
}
|
||||
} else if (c->is_Loop()) {
|
||||
ShouldNotReachHere();
|
||||
c = c->in(LoopNode::EntryControl);
|
||||
} else if (c->is_Region()) {
|
||||
_freqs_stack.push(0);
|
||||
_stack.push(c, 2);
|
||||
c = c->in(1);
|
||||
} else {
|
||||
if (c->is_IfProj()) {
|
||||
IfNode* iff = c->in(0)->as_If();
|
||||
if (iff->_prob == PROB_UNKNOWN) {
|
||||
// assume never taken
|
||||
_freqs.at_put_grow(c->_idx, 0, -1);
|
||||
} else if (_phase->get_loop(c) != _phase->get_loop(iff)) {
|
||||
if (iff->_fcnt == COUNT_UNKNOWN) {
|
||||
// assume never taken
|
||||
_freqs.at_put_grow(c->_idx, 0, -1);
|
||||
} else {
|
||||
// skip over loop
|
||||
_stack.push(c, 1);
|
||||
c = _phase->get_loop(c->in(0))->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
|
||||
}
|
||||
} else {
|
||||
_stack.push(c, 1);
|
||||
c = iff;
|
||||
}
|
||||
} else if (c->is_JumpProj()) {
|
||||
JumpNode* jmp = c->in(0)->as_Jump();
|
||||
if (_phase->get_loop(c) != _phase->get_loop(jmp)) {
|
||||
if (jmp->_fcnt == COUNT_UNKNOWN) {
|
||||
// assume never taken
|
||||
_freqs.at_put_grow(c->_idx, 0, -1);
|
||||
float p = -1;
|
||||
if (n->in(0)->is_If()) {
|
||||
p = n->in(0)->as_If()->_prob;
|
||||
if (n->Opcode() == Op_IfFalse) {
|
||||
p = 1 - p;
|
||||
}
|
||||
} else {
|
||||
// skip over loop
|
||||
_stack.push(c, 1);
|
||||
c = _phase->get_loop(c->in(0))->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
|
||||
assert(n->in(0)->is_Jump(), "unsupported node kind");
|
||||
p = n->in(0)->as_Jump()->_probs[n->as_JumpProj()->_con];
|
||||
}
|
||||
} else {
|
||||
_stack.push(c, 1);
|
||||
c = jmp;
|
||||
f = f * p;
|
||||
f = check_and_truncate_frequency(f);
|
||||
}
|
||||
} else if (c->Opcode() == Op_CatchProj &&
|
||||
c->in(0)->Opcode() == Op_Catch &&
|
||||
c->in(0)->in(0)->is_Proj() &&
|
||||
c->in(0)->in(0)->in(0)->is_Call()) {
|
||||
// assume exceptions are never thrown
|
||||
uint con = c->as_Proj()->_con;
|
||||
if (con == CatchProjNode::fall_through_index) {
|
||||
Node* call = c->in(0)->in(0)->in(0)->in(0);
|
||||
if (_phase->get_loop(call) != _phase->get_loop(c)) {
|
||||
_freqs.at_put_grow(c->_idx, 0, -1);
|
||||
} else {
|
||||
c = call;
|
||||
}
|
||||
} else {
|
||||
assert(con >= CatchProjNode::catch_all_index, "what else?");
|
||||
_freqs.at_put_grow(c->_idx, 0, -1);
|
||||
}
|
||||
} else if (c->unique_ctrl_out_or_null() == NULL && !c->is_If() && !c->is_Jump()) {
|
||||
ShouldNotReachHere();
|
||||
_freqs.at_put_grow(n->_idx, (float)f, -1);
|
||||
_stack.pop();
|
||||
} else {
|
||||
c = c->in(0);
|
||||
float prev_f = _freqs_stack.pop();
|
||||
float new_f = f;
|
||||
f = new_f + prev_f;
|
||||
f = check_and_truncate_frequency(f);
|
||||
uint i = _stack.index();
|
||||
if (i < n->req()) {
|
||||
c = n->in(i);
|
||||
_stack.set_index(i+1);
|
||||
_freqs_stack.push(f);
|
||||
} else {
|
||||
_freqs.at_put_grow(n->_idx, f, -1);
|
||||
_stack.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (_stack.size() == 0) {
|
||||
return check_and_truncate_frequency(f);
|
||||
}
|
||||
} else if (c->is_Loop()) {
|
||||
ShouldNotReachHere();
|
||||
c = c->in(LoopNode::EntryControl);
|
||||
} else if (c->is_Region()) {
|
||||
_freqs_stack.push(0);
|
||||
_stack.push(c, 2);
|
||||
c = c->in(1);
|
||||
} else {
|
||||
if (c->is_IfProj()) {
|
||||
IfNode* iff = c->in(0)->as_If();
|
||||
if (iff->_prob == PROB_UNKNOWN) {
|
||||
// assume never taken
|
||||
_freqs.at_put_grow(c->_idx, 0, -1);
|
||||
} else if (_phase->get_loop(c) != _phase->get_loop(iff)) {
|
||||
if (iff->_fcnt == COUNT_UNKNOWN) {
|
||||
// assume never taken
|
||||
_freqs.at_put_grow(c->_idx, 0, -1);
|
||||
} else {
|
||||
// skip over loop
|
||||
_stack.push(c, 1);
|
||||
c = _phase->get_loop(c->in(0))->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
|
||||
}
|
||||
} else {
|
||||
_stack.push(c, 1);
|
||||
c = iff;
|
||||
}
|
||||
} else if (c->is_JumpProj()) {
|
||||
JumpNode* jmp = c->in(0)->as_Jump();
|
||||
if (_phase->get_loop(c) != _phase->get_loop(jmp)) {
|
||||
if (jmp->_fcnt == COUNT_UNKNOWN) {
|
||||
// assume never taken
|
||||
_freqs.at_put_grow(c->_idx, 0, -1);
|
||||
} else {
|
||||
// skip over loop
|
||||
_stack.push(c, 1);
|
||||
c = _phase->get_loop(c->in(0))->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
|
||||
}
|
||||
} else {
|
||||
_stack.push(c, 1);
|
||||
c = jmp;
|
||||
}
|
||||
} else if (c->Opcode() == Op_CatchProj &&
|
||||
c->in(0)->Opcode() == Op_Catch &&
|
||||
c->in(0)->in(0)->is_Proj() &&
|
||||
c->in(0)->in(0)->in(0)->is_Call()) {
|
||||
// assume exceptions are never thrown
|
||||
uint con = c->as_Proj()->_con;
|
||||
if (con == CatchProjNode::fall_through_index) {
|
||||
Node* call = c->in(0)->in(0)->in(0)->in(0);
|
||||
if (_phase->get_loop(call) != _phase->get_loop(c)) {
|
||||
_freqs.at_put_grow(c->_idx, 0, -1);
|
||||
} else {
|
||||
c = call;
|
||||
}
|
||||
} else {
|
||||
assert(con >= CatchProjNode::catch_all_index, "what else?");
|
||||
_freqs.at_put_grow(c->_idx, 0, -1);
|
||||
}
|
||||
} else if (c->unique_ctrl_out_or_null() == NULL && !c->is_If() && !c->is_Jump()) {
|
||||
ShouldNotReachHere();
|
||||
} else {
|
||||
c = c->in(0);
|
||||
}
|
||||
}
|
||||
ShouldNotReachHere();
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
ShouldNotReachHere();
|
||||
return -1;
|
||||
}
|
||||
|
||||
void PhaseIdealLoop::loop_predication_follow_branches(Node *n, IdealLoopTree *loop, float loop_trip_cnt,
|
||||
PathFrequency& pf, Node_Stack& stack, VectorSet& seen,
|
||||
|
@ -3602,6 +3602,8 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
|
||||
} else if (policy_unswitching(phase)) {
|
||||
phase->do_unswitching(this, old_new);
|
||||
return false; // need to recalculate idom data
|
||||
} else if (phase->duplicate_loop_backedge(this, old_new)) {
|
||||
return false;
|
||||
} else if (_head->is_LongCountedLoop()) {
|
||||
phase->create_loop_nest(this, old_new);
|
||||
}
|
||||
@ -3630,6 +3632,9 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
|
||||
phase->do_maximally_unroll(this, old_new);
|
||||
return true;
|
||||
}
|
||||
if (StressDuplicateBackedge && phase->duplicate_loop_backedge(this, old_new)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
uint est_peeling = estimate_peeling(phase);
|
||||
|
@ -1420,6 +1420,7 @@ public:
|
||||
|
||||
// Partially peel loop up through last_peel node.
|
||||
bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
|
||||
bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);
|
||||
|
||||
// Create a scheduled list of nodes control dependent on ctrl set.
|
||||
void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );
|
||||
@ -1466,8 +1467,8 @@ public:
|
||||
// "Nearly" because all Nodes have been cloned from the original in the loop,
|
||||
// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
|
||||
// through the Phi recursively, and return a Bool.
|
||||
Node *clone_iff( PhiNode *phi, IdealLoopTree *loop );
|
||||
CmpNode *clone_bool( PhiNode *phi, IdealLoopTree *loop );
|
||||
Node* clone_iff(PhiNode* phi);
|
||||
CmpNode* clone_bool(PhiNode* phi);
|
||||
|
||||
|
||||
// Rework addressing expressions to get the most loop-invariant stuff
|
||||
@ -1680,6 +1681,20 @@ public:
|
||||
void push_pinned_nodes_thru_region(IfNode* dom_if, Node* region);
|
||||
|
||||
bool try_merge_identical_ifs(Node* n);
|
||||
|
||||
void clone_loop_body(const Node_List& body, Node_List &old_new, CloneMap* cm);
|
||||
|
||||
void fix_body_edges(const Node_List &body, IdealLoopTree* loop, const Node_List &old_new, int dd,
|
||||
IdealLoopTree* parent, bool partial);
|
||||
|
||||
void fix_ctrl_uses(const Node_List& body, const IdealLoopTree* loop, Node_List &old_new, CloneLoopMode mode,
|
||||
Node* side_by_side_idom, CloneMap* cm, Node_List &worklist);
|
||||
|
||||
void fix_data_uses(Node_List& body, IdealLoopTree* loop, CloneLoopMode mode, IdealLoopTree* outer_loop,
|
||||
uint new_counter, Node_List& old_new, Node_List& worklist, Node_List*& split_if_set,
|
||||
Node_List*& split_bool_set, Node_List*& split_cex_set);
|
||||
|
||||
void finish_clone_loop(Node_List* split_if_set, Node_List* split_bool_set, Node_List* split_cex_set);
|
||||
};
|
||||
|
||||
|
||||
@ -1829,4 +1844,33 @@ public:
|
||||
IdealLoopTree* current() { return _curnt; } // Return current value of iterator.
|
||||
};
|
||||
|
||||
// Compute probability of reaching some CFG node from a fixed
|
||||
// dominating CFG node
|
||||
class PathFrequency {
|
||||
private:
|
||||
Node* _dom; // frequencies are computed relative to this node
|
||||
Node_Stack _stack;
|
||||
GrowableArray<float> _freqs_stack; // keep track of intermediate result at regions
|
||||
GrowableArray<float> _freqs; // cache frequencies
|
||||
PhaseIdealLoop* _phase;
|
||||
|
||||
float check_and_truncate_frequency(float f) {
|
||||
assert(f >= 0, "Incorrect frequency");
|
||||
// We do not perform an exact (f <= 1) check
|
||||
// this would be error prone with rounding of floats.
|
||||
// Performing a check like (f <= 1+eps) would be of benefit,
|
||||
// however, it is not evident how to determine such an eps,
|
||||
// given that an arbitrary number of add/mul operations
|
||||
// are performed on these frequencies.
|
||||
return (f > 1) ? 1 : f;
|
||||
}
|
||||
|
||||
public:
|
||||
PathFrequency(Node* dom, PhaseIdealLoop* phase)
|
||||
: _dom(dom), _stack(0), _phase(phase) {
|
||||
}
|
||||
|
||||
float to(Node* n);
|
||||
};
|
||||
|
||||
#endif // SHARE_OPTO_LOOPNODE_HPP
|
||||
|
@ -1836,14 +1836,14 @@ void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack
|
||||
// "Nearly" because all Nodes have been cloned from the original in the loop,
|
||||
// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
|
||||
// through the Phi recursively, and return a Bool.
|
||||
Node* PhaseIdealLoop::clone_iff(PhiNode *phi, IdealLoopTree *loop) {
|
||||
Node* PhaseIdealLoop::clone_iff(PhiNode* phi) {
|
||||
|
||||
// Convert this Phi into a Phi merging Bools
|
||||
uint i;
|
||||
for (i = 1; i < phi->req(); i++) {
|
||||
Node *b = phi->in(i);
|
||||
if (b->is_Phi()) {
|
||||
_igvn.replace_input_of(phi, i, clone_iff(b->as_Phi(), loop));
|
||||
_igvn.replace_input_of(phi, i, clone_iff(b->as_Phi()));
|
||||
} else {
|
||||
assert(b->is_Bool() || b->Opcode() == Op_Opaque4, "");
|
||||
}
|
||||
@ -1923,13 +1923,13 @@ Node* PhaseIdealLoop::clone_iff(PhiNode *phi, IdealLoopTree *loop) {
|
||||
// "Nearly" because all Nodes have been cloned from the original in the loop,
|
||||
// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
|
||||
// through the Phi recursively, and return a Bool.
|
||||
CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
|
||||
CmpNode*PhaseIdealLoop::clone_bool(PhiNode* phi) {
|
||||
uint i;
|
||||
// Convert this Phi into a Phi merging Bools
|
||||
for( i = 1; i < phi->req(); i++ ) {
|
||||
Node *b = phi->in(i);
|
||||
if( b->is_Phi() ) {
|
||||
_igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
|
||||
_igvn.replace_input_of(phi, i, clone_bool(b->as_Phi()));
|
||||
} else {
|
||||
assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
|
||||
}
|
||||
@ -2268,7 +2268,7 @@ void PhaseIdealLoop::clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealL
|
||||
_igvn.register_new_node_with_optimizer(new_sfpt);
|
||||
_igvn.register_new_node_with_optimizer(new_cle_out);
|
||||
}
|
||||
// Some other transformation may have pessimistically assign some
|
||||
// Some other transformation may have pessimistically assigned some
|
||||
// data nodes to the outer loop. Set their control so they are out
|
||||
// of the outer loop.
|
||||
ResourceMark rm;
|
||||
@ -2340,7 +2340,6 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
|
||||
}
|
||||
|
||||
CloneMap& cm = C->clone_map();
|
||||
Dict* dict = cm.dict();
|
||||
if (C->do_vector_loop()) {
|
||||
cm.set_clone_idx(cm.max_gen()+1);
|
||||
#ifndef PRODUCT
|
||||
@ -2352,49 +2351,14 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
|
||||
}
|
||||
|
||||
// Step 1: Clone the loop body. Make the old->new mapping.
|
||||
uint i;
|
||||
for (i = 0; i < loop->_body.size(); i++) {
|
||||
Node* old = loop->_body.at(i);
|
||||
Node* nnn = old->clone();
|
||||
old_new.map(old->_idx, nnn);
|
||||
if (old->is_reduction()) {
|
||||
// Reduction flag is not copied by default. Copy it here when cloning the entire loop body.
|
||||
nnn->add_flag(Node::Flag_is_reduction);
|
||||
}
|
||||
if (C->do_vector_loop()) {
|
||||
cm.verify_insert_and_clone(old, nnn, cm.clone_idx());
|
||||
}
|
||||
_igvn.register_new_node_with_optimizer(nnn);
|
||||
}
|
||||
clone_loop_body(loop->_body, old_new, &cm);
|
||||
|
||||
IdealLoopTree* outer_loop = (head->is_strip_mined() && mode != IgnoreStripMined) ? get_loop(head->as_CountedLoop()->outer_loop()) : loop;
|
||||
|
||||
// Step 2: Fix the edges in the new body. If the old input is outside the
|
||||
// loop use it. If the old input is INside the loop, use the corresponding
|
||||
// new node instead.
|
||||
for( i = 0; i < loop->_body.size(); i++ ) {
|
||||
Node *old = loop->_body.at(i);
|
||||
Node *nnn = old_new[old->_idx];
|
||||
// Fix CFG/Loop controlling the new node
|
||||
if (has_ctrl(old)) {
|
||||
set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
|
||||
} else {
|
||||
set_loop(nnn, outer_loop->_parent);
|
||||
if (old->outcnt() > 0) {
|
||||
set_idom( nnn, old_new[idom(old)->_idx], dd );
|
||||
}
|
||||
}
|
||||
// Correct edges to the new node
|
||||
for( uint j = 0; j < nnn->req(); j++ ) {
|
||||
Node *n = nnn->in(j);
|
||||
if( n ) {
|
||||
IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
|
||||
if( loop->is_member( old_in_loop ) )
|
||||
nnn->set_req(j, old_new[n->_idx]);
|
||||
}
|
||||
}
|
||||
_igvn.hash_find_insert(nnn);
|
||||
}
|
||||
fix_body_edges(loop->_body, loop, old_new, dd, outer_loop->_parent, false);
|
||||
|
||||
Node_List extra_data_nodes; // data nodes in the outer strip mined loop
|
||||
clone_outer_loop(head, mode, loop, outer_loop, dd, old_new, extra_data_nodes);
|
||||
@ -2406,22 +2370,93 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
|
||||
// refer to this.
|
||||
Node_List worklist;
|
||||
uint new_counter = C->unique();
|
||||
for( i = 0; i < loop->_body.size(); i++ ) {
|
||||
Node* old = loop->_body.at(i);
|
||||
fix_ctrl_uses(loop->_body, loop, old_new, mode, side_by_side_idom, &cm, worklist);
|
||||
|
||||
// Step 4: If loop-invariant use is not control, it must be dominated by a
|
||||
// loop exit IfFalse/IfTrue. Find "proper" loop exit. Make a Region
|
||||
// there if needed. Make a Phi there merging old and new used values.
|
||||
Node_List *split_if_set = NULL;
|
||||
Node_List *split_bool_set = NULL;
|
||||
Node_List *split_cex_set = NULL;
|
||||
fix_data_uses(loop->_body, loop, mode, outer_loop, new_counter, old_new, worklist, split_if_set, split_bool_set, split_cex_set);
|
||||
|
||||
for (uint i = 0; i < extra_data_nodes.size(); i++) {
|
||||
Node* old = extra_data_nodes.at(i);
|
||||
clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
|
||||
split_bool_set, split_cex_set, worklist, new_counter,
|
||||
mode);
|
||||
}
|
||||
|
||||
// Check for IFs that need splitting/cloning. Happens if an IF outside of
|
||||
// the loop uses a condition set in the loop. The original IF probably
|
||||
// takes control from one or more OLD Regions (which in turn get from NEW
|
||||
// Regions). In any case, there will be a set of Phis for each merge point
|
||||
// from the IF up to where the original BOOL def exists the loop.
|
||||
finish_clone_loop(split_if_set, split_bool_set, split_cex_set);
|
||||
|
||||
}
|
||||
|
||||
void PhaseIdealLoop::finish_clone_loop(Node_List* split_if_set, Node_List* split_bool_set, Node_List* split_cex_set) {
|
||||
if (split_if_set) {
|
||||
while (split_if_set->size()) {
|
||||
Node *iff = split_if_set->pop();
|
||||
if (iff->in(1)->is_Phi()) {
|
||||
Node *b = clone_iff(iff->in(1)->as_Phi());
|
||||
_igvn.replace_input_of(iff, 1, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (split_bool_set) {
|
||||
while (split_bool_set->size()) {
|
||||
Node *b = split_bool_set->pop();
|
||||
Node *phi = b->in(1);
|
||||
assert(phi->is_Phi(), "");
|
||||
CmpNode *cmp = clone_bool((PhiNode*) phi);
|
||||
_igvn.replace_input_of(b, 1, cmp);
|
||||
}
|
||||
}
|
||||
if (split_cex_set) {
|
||||
while (split_cex_set->size()) {
|
||||
Node *b = split_cex_set->pop();
|
||||
assert(b->in(0)->is_Region(), "");
|
||||
assert(b->in(1)->is_Phi(), "");
|
||||
assert(b->in(0)->in(0) == b->in(1)->in(0), "");
|
||||
split_up(b, b->in(0), NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseIdealLoop::fix_data_uses(Node_List& body, IdealLoopTree* loop, CloneLoopMode mode, IdealLoopTree* outer_loop,
|
||||
uint new_counter, Node_List &old_new, Node_List &worklist, Node_List*& split_if_set,
|
||||
Node_List*& split_bool_set, Node_List*& split_cex_set) {
|
||||
for(uint i = 0; i < body.size(); i++ ) {
|
||||
Node* old = body.at(i);
|
||||
clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
|
||||
split_bool_set, split_cex_set, worklist, new_counter,
|
||||
mode);
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseIdealLoop::fix_ctrl_uses(const Node_List& body, const IdealLoopTree* loop, Node_List &old_new, CloneLoopMode mode,
|
||||
Node* side_by_side_idom, CloneMap* cm, Node_List &worklist) {
|
||||
LoopNode* head = loop->_head->as_Loop();
|
||||
for(uint i = 0; i < body.size(); i++ ) {
|
||||
Node* old = body.at(i);
|
||||
if( !old->is_CFG() ) continue;
|
||||
|
||||
// Copy uses to a worklist, so I can munge the def-use info
|
||||
// with impunity.
|
||||
for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
|
||||
for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) {
|
||||
worklist.push(old->fast_out(j));
|
||||
}
|
||||
|
||||
while( worklist.size() ) { // Visit all uses
|
||||
while (worklist.size()) { // Visit all uses
|
||||
Node *use = worklist.pop();
|
||||
if (!has_node(use)) continue; // Ignore dead nodes
|
||||
IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
|
||||
if( !loop->is_member( use_loop ) && use->is_CFG() ) {
|
||||
IdealLoopTree *use_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use );
|
||||
if (!loop->is_member(use_loop) && use->is_CFG()) {
|
||||
// Both OLD and USE are CFG nodes here.
|
||||
assert( use->is_Proj(), "" );
|
||||
assert(use->is_Proj(), "" );
|
||||
Node* nnn = old_new[old->_idx];
|
||||
|
||||
Node* newuse = NULL;
|
||||
@ -2445,8 +2480,8 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
|
||||
}
|
||||
|
||||
// Clone the loop exit control projection
|
||||
if (C->do_vector_loop()) {
|
||||
cm.verify_insert_and_clone(use, newuse, cm.clone_idx());
|
||||
if (C->do_vector_loop() && cm != NULL) {
|
||||
cm->verify_insert_and_clone(use, newuse, cm->clone_idx());
|
||||
}
|
||||
newuse->set_req(0,nnn);
|
||||
_igvn.register_new_node_with_optimizer(newuse);
|
||||
@ -2458,18 +2493,18 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
|
||||
RegionNode *r = new RegionNode(3);
|
||||
// Map the old use to the new merge point
|
||||
old_new.map( use->_idx, r );
|
||||
uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
|
||||
assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
|
||||
uint dd_r = MIN2(dom_depth(newuse), dom_depth(use));
|
||||
assert(dd_r >= dom_depth(dom_lca(newuse, use)), "" );
|
||||
|
||||
// The original user of 'use' uses 'r' instead.
|
||||
for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
|
||||
Node* useuse = use->last_out(l);
|
||||
_igvn.rehash_node_delayed(useuse);
|
||||
uint uses_found = 0;
|
||||
if( useuse->in(0) == use ) {
|
||||
if (useuse->in(0) == use) {
|
||||
useuse->set_req(0, r);
|
||||
uses_found++;
|
||||
if( useuse->is_CFG() ) {
|
||||
if (useuse->is_CFG()) {
|
||||
// This is not a dom_depth > dd_r because when new
|
||||
// control flow is constructed by a loop opt, a node and
|
||||
// its dominator can end up at the same dom_depth
|
||||
@ -2477,7 +2512,7 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
|
||||
set_idom(useuse, r, dom_depth(useuse));
|
||||
}
|
||||
}
|
||||
for( uint k = 1; k < useuse->req(); k++ ) {
|
||||
for (uint k = 1; k < useuse->req(); k++) {
|
||||
if( useuse->in(k) == use ) {
|
||||
useuse->set_req(k, r);
|
||||
uses_found++;
|
||||
@ -2494,68 +2529,67 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
|
||||
}
|
||||
|
||||
// Now finish up 'r'
|
||||
r->set_req( 1, newuse );
|
||||
r->set_req( 2, use );
|
||||
r->set_req(1, newuse);
|
||||
r->set_req(2, use);
|
||||
_igvn.register_new_node_with_optimizer(r);
|
||||
set_loop(r, use_loop);
|
||||
set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
|
||||
set_idom(r, (side_by_side_idom == NULL) ? newuse->in(0) : side_by_side_idom, dd_r);
|
||||
} // End of if a loop-exit test
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: If loop-invariant use is not control, it must be dominated by a
|
||||
// loop exit IfFalse/IfTrue. Find "proper" loop exit. Make a Region
|
||||
// there if needed. Make a Phi there merging old and new used values.
|
||||
Node_List *split_if_set = NULL;
|
||||
Node_List *split_bool_set = NULL;
|
||||
Node_List *split_cex_set = NULL;
|
||||
for( i = 0; i < loop->_body.size(); i++ ) {
|
||||
Node* old = loop->_body.at(i);
|
||||
clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
|
||||
split_bool_set, split_cex_set, worklist, new_counter,
|
||||
mode);
|
||||
}
|
||||
|
||||
for (i = 0; i < extra_data_nodes.size(); i++) {
|
||||
Node* old = extra_data_nodes.at(i);
|
||||
clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
|
||||
split_bool_set, split_cex_set, worklist, new_counter,
|
||||
mode);
|
||||
}
|
||||
|
||||
// Check for IFs that need splitting/cloning. Happens if an IF outside of
|
||||
// the loop uses a condition set in the loop. The original IF probably
|
||||
// takes control from one or more OLD Regions (which in turn get from NEW
|
||||
// Regions). In any case, there will be a set of Phis for each merge point
|
||||
// from the IF up to where the original BOOL def exists the loop.
|
||||
if (split_if_set) {
|
||||
while (split_if_set->size()) {
|
||||
Node *iff = split_if_set->pop();
|
||||
if (iff->in(1)->is_Phi()) {
|
||||
Node *b = clone_iff(iff->in(1)->as_Phi(), loop);
|
||||
_igvn.replace_input_of(iff, 1, b);
|
||||
void PhaseIdealLoop::fix_body_edges(const Node_List &body, IdealLoopTree* loop, const Node_List &old_new, int dd,
|
||||
IdealLoopTree* parent, bool partial) {
|
||||
for(uint i = 0; i < body.size(); i++ ) {
|
||||
Node *old = body.at(i);
|
||||
Node *nnn = old_new[old->_idx];
|
||||
// Fix CFG/Loop controlling the new node
|
||||
if (has_ctrl(old)) {
|
||||
set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
|
||||
} else {
|
||||
set_loop(nnn, parent);
|
||||
if (old->outcnt() > 0) {
|
||||
Node* dom = idom(old);
|
||||
if (old_new[dom->_idx] != NULL) {
|
||||
dom = old_new[dom->_idx];
|
||||
set_idom(nnn, dom, dd );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (split_bool_set) {
|
||||
while (split_bool_set->size()) {
|
||||
Node *b = split_bool_set->pop();
|
||||
Node *phi = b->in(1);
|
||||
assert(phi->is_Phi(), "");
|
||||
CmpNode *cmp = clone_bool((PhiNode*)phi, loop);
|
||||
_igvn.replace_input_of(b, 1, cmp);
|
||||
}
|
||||
}
|
||||
if (split_cex_set) {
|
||||
while (split_cex_set->size()) {
|
||||
Node *b = split_cex_set->pop();
|
||||
assert(b->in(0)->is_Region(), "");
|
||||
assert(b->in(1)->is_Phi(), "");
|
||||
assert(b->in(0)->in(0) == b->in(1)->in(0), "");
|
||||
split_up(b, b->in(0), NULL);
|
||||
// Correct edges to the new node
|
||||
for (uint j = 0; j < nnn->req(); j++) {
|
||||
Node *n = nnn->in(j);
|
||||
if (n != NULL) {
|
||||
IdealLoopTree *old_in_loop = get_loop(has_ctrl(n) ? get_ctrl(n) : n);
|
||||
if (loop->is_member(old_in_loop)) {
|
||||
if (old_new[n->_idx] != NULL) {
|
||||
nnn->set_req(j, old_new[n->_idx]);
|
||||
} else {
|
||||
assert(!body.contains(n), "");
|
||||
assert(partial, "node not cloned");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_igvn.hash_find_insert(nnn);
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseIdealLoop::clone_loop_body(const Node_List& body, Node_List &old_new, CloneMap* cm) {
|
||||
for (uint i = 0; i < body.size(); i++) {
|
||||
Node* old = body.at(i);
|
||||
Node* nnn = old->clone();
|
||||
old_new.map(old->_idx, nnn);
|
||||
if (old->is_reduction()) {
|
||||
// Reduction flag is not copied by default. Copy it here when cloning the entire loop body.
|
||||
nnn->add_flag(Node::Flag_is_reduction);
|
||||
}
|
||||
if (C->do_vector_loop() && cm != NULL) {
|
||||
cm->verify_insert_and_clone(old, nnn, cm->clone_idx());
|
||||
}
|
||||
_igvn.register_new_node_with_optimizer(nnn);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -3770,6 +3804,285 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Transform:
|
||||
//
|
||||
// loop<-----------------+
|
||||
// | |
|
||||
// stmt1 stmt2 .. stmtn |
|
||||
// | | | |
|
||||
// \ | / |
|
||||
// v v v |
|
||||
// region |
|
||||
// | |
|
||||
// shared_stmt |
|
||||
// | |
|
||||
// v |
|
||||
// if |
|
||||
// / \ |
|
||||
// | -----------+
|
||||
// v
|
||||
//
|
||||
// into:
|
||||
//
|
||||
// loop<-------------------+
|
||||
// | |
|
||||
// v |
|
||||
// +->loop |
|
||||
// | | |
|
||||
// | stmt1 stmt2 .. stmtn |
|
||||
// | | | | |
|
||||
// | | \ / |
|
||||
// | | v v |
|
||||
// | | region1 |
|
||||
// | | | |
|
||||
// | shared_stmt shared_stmt |
|
||||
// | | | |
|
||||
// | v v |
|
||||
// | if if |
|
||||
// | /\ / \ |
|
||||
// +-- | | -------+
|
||||
// \ /
|
||||
// v v
|
||||
// region2
|
||||
//
|
||||
// (region2 is shown to merge mirrored projections of the loop exit
|
||||
// ifs to make the diagram clearer but they really merge the same
|
||||
// projection)
|
||||
//
|
||||
// Conditions for this transformation to trigger:
|
||||
// - the path through stmt1 is frequent enough
|
||||
// - the inner loop will be turned into a counted loop after transformation
|
||||
bool PhaseIdealLoop::duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new) {
|
||||
if (!DuplicateBackedge) {
|
||||
return false;
|
||||
}
|
||||
assert(!loop->_head->is_CountedLoop() || StressDuplicateBackedge, "Non-counted loop only");
|
||||
if (!loop->_head->is_Loop()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint estimate = loop->est_loop_clone_sz(1);
|
||||
if (exceeding_node_budget(estimate)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
LoopNode *head = loop->_head->as_Loop();
|
||||
|
||||
Node* region = NULL;
|
||||
IfNode* exit_test = NULL;
|
||||
uint inner;
|
||||
float f;
|
||||
if (StressDuplicateBackedge) {
|
||||
if (head->is_strip_mined()) {
|
||||
return false;
|
||||
}
|
||||
Node* c = head->in(LoopNode::LoopBackControl);
|
||||
|
||||
while (c != head) {
|
||||
if (c->is_Region()) {
|
||||
region = c;
|
||||
}
|
||||
c = idom(c);
|
||||
}
|
||||
|
||||
if (region == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
inner = 1;
|
||||
} else {
|
||||
// Is the shape of the loop that of a counted loop...
|
||||
Node* back_control = loop_exit_control(head, loop);
|
||||
if (back_control == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BoolTest::mask bt = BoolTest::illegal;
|
||||
float cl_prob = 0;
|
||||
Node* incr = NULL;
|
||||
Node* limit = NULL;
|
||||
Node* cmp = loop_exit_test(back_control, loop, incr, limit, bt, cl_prob);
|
||||
if (cmp == NULL || cmp->Opcode() != Op_CmpI) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// With an extra phi for the candidate iv?
|
||||
if (!incr->is_Phi()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
PathFrequency pf(head, this);
|
||||
region = incr->in(0);
|
||||
|
||||
// Go over all paths for the extra phi's region and see if that
|
||||
// path is frequent enough and would match the expected iv shape
|
||||
// if the extra phi is removed
|
||||
inner = 0;
|
||||
for (uint i = 1; i < incr->req(); ++i) {
|
||||
Node* in = incr->in(i);
|
||||
Node* trunc1 = NULL;
|
||||
Node* trunc2 = NULL;
|
||||
const TypeInteger* iv_trunc_t = NULL;
|
||||
Node* orig_in = in;
|
||||
if (!(in = CountedLoopNode::match_incr_with_optional_truncation(in, &trunc1, &trunc2, &iv_trunc_t, T_INT))) {
|
||||
continue;
|
||||
}
|
||||
assert(in->Opcode() == Op_AddI, "wrong increment code");
|
||||
Node* xphi = NULL;
|
||||
Node* stride = loop_iv_stride(in, loop, xphi);
|
||||
|
||||
if (stride == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
PhiNode* phi = loop_iv_phi(xphi, NULL, head, loop);
|
||||
if (phi == NULL ||
|
||||
(trunc1 == NULL && phi->in(LoopNode::LoopBackControl) != incr) ||
|
||||
(trunc1 != NULL && phi->in(LoopNode::LoopBackControl) != trunc1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
f = pf.to(region->in(i));
|
||||
if (f > 0.5) {
|
||||
inner = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (inner == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
exit_test = back_control->in(0)->as_If();
|
||||
}
|
||||
|
||||
if (idom(region)->is_Catch()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Collect all control nodes that need to be cloned (shared_stmt in the diagram)
|
||||
Unique_Node_List wq;
|
||||
wq.push(head->in(LoopNode::LoopBackControl));
|
||||
for (uint i = 0; i < wq.size(); i++) {
|
||||
Node* c = wq.at(i);
|
||||
assert(get_loop(c) == loop, "not in the right loop?");
|
||||
if (c->is_Region()) {
|
||||
if (c != region) {
|
||||
for (uint j = 1; j < c->req(); ++j) {
|
||||
wq.push(c->in(j));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
wq.push(c->in(0));
|
||||
}
|
||||
assert(!is_dominator(c, region) || c == region, "shouldn't go above region");
|
||||
}
|
||||
|
||||
Node* region_dom = idom(region);
|
||||
|
||||
// Can't do the transformation if this would cause a membar pair to
|
||||
// be split
|
||||
for (uint i = 0; i < wq.size(); i++) {
|
||||
Node* c = wq.at(i);
|
||||
if (c->is_MemBar() && (c->as_MemBar()->trailing_store() || c->as_MemBar()->trailing_load_store())) {
|
||||
assert(c->as_MemBar()->leading_membar()->trailing_membar() == c, "bad membar pair");
|
||||
if (!wq.member(c->as_MemBar()->leading_membar())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Collect data nodes that need to be clones as well
|
||||
int dd = dom_depth(head);
|
||||
|
||||
for (uint i = 0; i < loop->_body.size(); ++i) {
|
||||
Node* n = loop->_body.at(i);
|
||||
if (has_ctrl(n)) {
|
||||
Node* c = get_ctrl(n);
|
||||
if (wq.member(c)) {
|
||||
wq.push(n);
|
||||
}
|
||||
} else {
|
||||
set_idom(n, idom(n), dd);
|
||||
}
|
||||
}
|
||||
|
||||
// clone shared_stmt
|
||||
clone_loop_body(wq, old_new, NULL);
|
||||
|
||||
Node* region_clone = old_new[region->_idx];
|
||||
region_clone->set_req(inner, C->top());
|
||||
set_idom(region, region->in(inner), dd);
|
||||
|
||||
// Prepare the outer loop
|
||||
Node* outer_head = new LoopNode(head->in(LoopNode::EntryControl), old_new[head->in(LoopNode::LoopBackControl)->_idx]);
|
||||
register_control(outer_head, loop->_parent, outer_head->in(LoopNode::EntryControl));
|
||||
_igvn.replace_input_of(head, LoopNode::EntryControl, outer_head);
|
||||
set_idom(head, outer_head, dd);
|
||||
|
||||
fix_body_edges(wq, loop, old_new, dd, loop->_parent, true);
|
||||
|
||||
// Make one of the shared_stmt copies only reachable from stmt1, the
|
||||
// other only from stmt2..stmtn.
|
||||
Node* dom = NULL;
|
||||
for (uint i = 1; i < region->req(); ++i) {
|
||||
if (i != inner) {
|
||||
_igvn.replace_input_of(region, i, C->top());
|
||||
}
|
||||
Node* in = region_clone->in(i);
|
||||
if (in->is_top()) {
|
||||
continue;
|
||||
}
|
||||
if (dom == NULL) {
|
||||
dom = in;
|
||||
} else {
|
||||
dom = dom_lca(dom, in);
|
||||
}
|
||||
}
|
||||
|
||||
set_idom(region_clone, dom, dd);
|
||||
|
||||
// Set up the outer loop
|
||||
for (uint i = 0; i < head->outcnt(); i++) {
|
||||
Node* u = head->raw_out(i);
|
||||
if (u->is_Phi()) {
|
||||
Node* outer_phi = u->clone();
|
||||
outer_phi->set_req(0, outer_head);
|
||||
Node* backedge = old_new[u->in(LoopNode::LoopBackControl)->_idx];
|
||||
if (backedge == NULL) {
|
||||
backedge = u->in(LoopNode::LoopBackControl);
|
||||
}
|
||||
outer_phi->set_req(LoopNode::LoopBackControl, backedge);
|
||||
register_new_node(outer_phi, outer_head);
|
||||
_igvn.replace_input_of(u, LoopNode::EntryControl, outer_phi);
|
||||
}
|
||||
}
|
||||
|
||||
// create control and data nodes for out of loop uses (including region2)
|
||||
Node_List worklist;
|
||||
uint new_counter = C->unique();
|
||||
fix_ctrl_uses(wq, loop, old_new, ControlAroundStripMined, outer_head, NULL, worklist);
|
||||
|
||||
Node_List *split_if_set = NULL;
|
||||
Node_List *split_bool_set = NULL;
|
||||
Node_List *split_cex_set = NULL;
|
||||
fix_data_uses(wq, loop, ControlAroundStripMined, head->is_strip_mined() ? loop->_parent : loop, new_counter, old_new, worklist, split_if_set, split_bool_set, split_cex_set);
|
||||
|
||||
finish_clone_loop(split_if_set, split_bool_set, split_cex_set);
|
||||
|
||||
if (exit_test != NULL) {
|
||||
float cnt = exit_test->_fcnt;
|
||||
if (cnt != COUNT_UNKNOWN) {
|
||||
exit_test->_fcnt = cnt * f;
|
||||
old_new[exit_test->_idx]->as_If()->_fcnt = cnt * (1 - f);
|
||||
}
|
||||
}
|
||||
|
||||
C->set_major_progress();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------reorg_offsets----------------------------------
|
||||
// Reorganize offset computations to lower register pressure. Mostly
|
||||
// prevent loop-fallout uses of the pre-incremented trip counter (which are
|
||||
|
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Red Hat, Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.c2.irTests;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
import java.util.Objects;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8279888
|
||||
* @summary Local variable independently used by multiple loops can interfere with loop optimizations
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.irTests.TestDuplicateBackedge
|
||||
*/
|
||||
|
||||
public class TestDuplicateBackedge {
|
||||
public static void main(String[] args) {
|
||||
TestFramework.runWithFlags("-XX:LoopMaxUnroll=1");
|
||||
TestFramework.runWithFlags("-XX:LoopMaxUnroll=1", "-XX:-DuplicateBackedge");
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = { "DuplicateBackedge", "true" }, counts = { IRNode.LOOP, "1", IRNode.COUNTEDLOOP, "1" })
|
||||
@IR(applyIf = { "DuplicateBackedge", "false" }, counts = { IRNode.LOOP, "1" })
|
||||
@IR(applyIf = { "DuplicateBackedge", "false" }, failOn = { IRNode.COUNTEDLOOP })
|
||||
public static float test() {
|
||||
float res = 1;
|
||||
for (int i = 1;;) {
|
||||
if (i % 10 == 0) {
|
||||
i = (i * 2) + 1;
|
||||
res /= 42;
|
||||
} else {
|
||||
i++;
|
||||
res *= 42;
|
||||
}
|
||||
if (i >= 1000) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
210
test/micro/org/openjdk/bench/vm/compiler/LoopLocals.java
Normal file
210
test/micro/org/openjdk/bench/vm/compiler/LoopLocals.java
Normal file
@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.vm.compiler;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Examine issues with (potentially) uninitialized locals interfering with
|
||||
* loop optimizations
|
||||
*/
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@Fork(value = 3)
|
||||
@Warmup(iterations = 5, time = 2)
|
||||
@Measurement(iterations = 5, time = 3)
|
||||
@State(Scope.Thread)
|
||||
public class LoopLocals {
|
||||
|
||||
public char[] bytesStartingWithNegative = """
|
||||
\uFF11
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu
|
||||
urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.
|
||||
Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et
|
||||
sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum
|
||||
dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent
|
||||
per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla
|
||||
sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida
|
||||
efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.
|
||||
Suspendisse potenti.
|
||||
|
||||
Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis
|
||||
nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet
|
||||
sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum
|
||||
consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.
|
||||
Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id
|
||||
hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl
|
||||
euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus,
|
||||
tristique mollis odio blandit quis. Vivamus posuere.
|
||||
""".toCharArray();
|
||||
|
||||
public char[] bytesEndingWithNegative = """
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu
|
||||
urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.
|
||||
Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et
|
||||
sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum
|
||||
dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent
|
||||
per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla
|
||||
sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida
|
||||
efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.
|
||||
Suspendisse potenti.
|
||||
|
||||
Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis
|
||||
nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet
|
||||
sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum
|
||||
consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.
|
||||
Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id
|
||||
hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl
|
||||
euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus,
|
||||
tristique mollis odio blandit quis. Vivamus posuere. \uFF11
|
||||
""".toCharArray();
|
||||
|
||||
@Param({"startNonASCII", "endNonASCII", "mixed"})
|
||||
private String variant;
|
||||
private char[] val;
|
||||
@Setup
|
||||
public void setup() {
|
||||
val = switch (variant) {
|
||||
case "startNonASCII" -> bytesStartingWithNegative;
|
||||
case "endNonASCII" -> bytesEndingWithNegative;
|
||||
case "mixed" -> {
|
||||
char[] chars = bytesEndingWithNegative.clone();
|
||||
var random = new Random(0L);
|
||||
for (int i = 0; i < chars.length; i++) {
|
||||
if (random.nextInt(100) < 30) {
|
||||
chars[i] = (char)(chars[i] + random.nextInt(0x2F00));
|
||||
}
|
||||
}
|
||||
yield chars;
|
||||
}
|
||||
default -> throw new RuntimeException("Unknown variant: " + variant);
|
||||
};
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] loopsWithSharedLocal() {
|
||||
int dp = 0;
|
||||
int sp = 0;
|
||||
int sl = val.length >> 1;
|
||||
byte[] dst = new byte[sl * 3];
|
||||
char c;
|
||||
while (sp < sl && (c = getChar(val, sp)) < '\u0080') {
|
||||
dst[dp++] = (byte)c;
|
||||
sp++;
|
||||
}
|
||||
while (sp < sl) {
|
||||
c = getChar(val, sp++);
|
||||
if (c < 0x80) {
|
||||
dst[dp++] = (byte)c;
|
||||
} else if (c < 0x800) {
|
||||
dst[dp++] = (byte)(0xc0 | (c >> 6));
|
||||
dst[dp++] = (byte)(0x80 | (c & 0x3f));
|
||||
} else if (Character.isSurrogate(c)) {
|
||||
int uc = -1;
|
||||
char c2;
|
||||
if (Character.isHighSurrogate(c) && sp < sl &&
|
||||
Character.isLowSurrogate(c2 = getChar(val, sp))) {
|
||||
uc = Character.toCodePoint(c, c2);
|
||||
}
|
||||
if (uc < 0) {
|
||||
dst[dp++] = '?';
|
||||
} else {
|
||||
dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
|
||||
dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
|
||||
dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
|
||||
dst[dp++] = (byte)(0x80 | (uc & 0x3f));
|
||||
sp++; // 2 chars
|
||||
}
|
||||
} else {
|
||||
// 3 bytes, 16 bits
|
||||
dst[dp++] = (byte)(0xe0 | ((c >> 12)));
|
||||
dst[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
|
||||
dst[dp++] = (byte)(0x80 | (c & 0x3f));
|
||||
}
|
||||
}
|
||||
if (dp == dst.length) {
|
||||
return dst;
|
||||
}
|
||||
return Arrays.copyOf(dst, dp);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] loopsWithScopedLocal() {
|
||||
int dp = 0;
|
||||
int sp = 0;
|
||||
int sl = val.length >> 1;
|
||||
byte[] dst = new byte[sl * 3];
|
||||
while (sp < sl) {
|
||||
// ascii fast loop;
|
||||
char c = getChar(val, sp);
|
||||
if (c >= '\u0080') {
|
||||
break;
|
||||
}
|
||||
dst[dp++] = (byte)c;
|
||||
sp++;
|
||||
}
|
||||
while (sp < sl) {
|
||||
char c = getChar(val, sp++);
|
||||
if (c < 0x80) {
|
||||
dst[dp++] = (byte)c;
|
||||
} else if (c < 0x800) {
|
||||
dst[dp++] = (byte)(0xc0 | (c >> 6));
|
||||
dst[dp++] = (byte)(0x80 | (c & 0x3f));
|
||||
} else if (Character.isSurrogate(c)) {
|
||||
int uc = -1;
|
||||
char c2;
|
||||
if (Character.isHighSurrogate(c) && sp < sl &&
|
||||
Character.isLowSurrogate(c2 = getChar(val, sp))) {
|
||||
uc = Character.toCodePoint(c, c2);
|
||||
}
|
||||
if (uc < 0) {
|
||||
dst[dp++] = '?';
|
||||
} else {
|
||||
dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
|
||||
dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
|
||||
dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
|
||||
dst[dp++] = (byte)(0x80 | (uc & 0x3f));
|
||||
sp++; // 2 chars
|
||||
}
|
||||
} else {
|
||||
// 3 bytes, 16 bits
|
||||
dst[dp++] = (byte)(0xe0 | ((c >> 12)));
|
||||
dst[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
|
||||
dst[dp++] = (byte)(0x80 | (c & 0x3f));
|
||||
}
|
||||
}
|
||||
if (dp == dst.length) {
|
||||
return dst;
|
||||
}
|
||||
return Arrays.copyOf(dst, dp);
|
||||
}
|
||||
|
||||
static char getChar(char[] val, int index) {
|
||||
return val[index];
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user