8314307: Improve loop handling
Co-authored-by: Christian Hagedorn <chagedorn@openjdk.org> Co-authored-by: Roland Westrelin <rwestrel@redhat.com> Co-authored-by: Emanuel Peter <epeter@openjdk.org> Reviewed-by: mschoene, rhalade, thartmann, epeter
This commit is contained in:
parent
1cc6fbd9b0
commit
c1a568c9c4
@ -1898,6 +1898,46 @@ Node* RangeCheckNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
// then we are guaranteed to fail, so just start interpreting there.
|
||||
// We 'expand' the top 3 range checks to include all post-dominating
|
||||
// checks.
|
||||
//
|
||||
// Example:
|
||||
// a[i+x] // (1) 1 < x < 6
|
||||
// a[i+3] // (2)
|
||||
// a[i+4] // (3)
|
||||
// a[i+6] // max = max of all constants
|
||||
// a[i+2]
|
||||
// a[i+1] // min = min of all constants
|
||||
//
|
||||
// If x < 3:
|
||||
// (1) a[i+x]: Leave unchanged
|
||||
// (2) a[i+3]: Replace with a[i+max] = a[i+6]: i+x < i+3 <= i+6 -> (2) is covered
|
||||
// (3) a[i+4]: Replace with a[i+min] = a[i+1]: i+1 < i+4 <= i+6 -> (3) and all following checks are covered
|
||||
// Remove all other a[i+c] checks
|
||||
//
|
||||
// If x >= 3:
|
||||
// (1) a[i+x]: Leave unchanged
|
||||
// (2) a[i+3]: Replace with a[i+min] = a[i+1]: i+1 < i+3 <= i+x -> (2) is covered
|
||||
// (3) a[i+4]: Replace with a[i+max] = a[i+6]: i+1 < i+4 <= i+6 -> (3) and all following checks are covered
|
||||
// Remove all other a[i+c] checks
|
||||
//
|
||||
// We only need the top 2 range checks if x is the min or max of all constants.
|
||||
//
|
||||
// This, however, only works if the interval [i+min,i+max] is not larger than max_int (i.e. abs(max - min) < max_int):
|
||||
// The theoretical max size of an array is max_int with:
|
||||
// - Valid index space: [0,max_int-1]
|
||||
// - Invalid index space: [max_int,-1] // max_int, min_int, min_int - 1 ..., -1
|
||||
//
|
||||
// The size of the consecutive valid index space is smaller than the size of the consecutive invalid index space.
|
||||
// If we choose min and max in such a way that:
|
||||
// - abs(max - min) < max_int
|
||||
// - i+max and i+min are inside the valid index space
|
||||
// then all indices [i+min,i+max] must be in the valid index space. Otherwise, the invalid index space must be
|
||||
// smaller than the valid index space which is never the case for any array size.
|
||||
//
|
||||
// Choosing a smaller array size only makes the valid index space smaller and the invalid index space larger and
|
||||
// the argument above still holds.
|
||||
//
|
||||
// Note that the same optimization with the same maximal accepted interval size can also be found in C1.
|
||||
const jlong maximum_number_of_min_max_interval_indices = (jlong)max_jint;
|
||||
|
||||
// The top 3 range checks seen
|
||||
const int NRC = 3;
|
||||
@ -1932,13 +1972,18 @@ Node* RangeCheckNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
found_immediate_dominator = true;
|
||||
break;
|
||||
}
|
||||
// Gather expanded bounds
|
||||
off_lo = MIN2(off_lo,offset2);
|
||||
off_hi = MAX2(off_hi,offset2);
|
||||
// Record top NRC range checks
|
||||
prev_checks[nb_checks%NRC].ctl = prev_dom->as_IfProj();
|
||||
prev_checks[nb_checks%NRC].off = offset2;
|
||||
nb_checks++;
|
||||
|
||||
// "x - y" -> must add one to the difference for number of elements in [x,y]
|
||||
const jlong diff = (jlong)MIN2(offset2, off_lo) - (jlong)MAX2(offset2, off_hi);
|
||||
if (ABS(diff) < maximum_number_of_min_max_interval_indices) {
|
||||
// Gather expanded bounds
|
||||
off_lo = MIN2(off_lo, offset2);
|
||||
off_hi = MAX2(off_hi, offset2);
|
||||
// Record top NRC range checks
|
||||
prev_checks[nb_checks % NRC].ctl = prev_dom->as_IfProj();
|
||||
prev_checks[nb_checks % NRC].off = offset2;
|
||||
nb_checks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
prev_dom = dom;
|
||||
|
@ -850,9 +850,10 @@ BoolNode* PhaseIdealLoop::rc_predicate(IdealLoopTree* loop, Node* ctrl, int scal
|
||||
// Check if (scale * max_idx_expr) may overflow
|
||||
const TypeInt* scale_type = TypeInt::make(scale);
|
||||
MulINode* mul = new MulINode(max_idx_expr, con_scale);
|
||||
idx_type = (TypeInt*)mul->mul_ring(idx_type, scale_type);
|
||||
if (overflow || TypeInt::INT->higher_equal(idx_type)) {
|
||||
|
||||
if (overflow || MulINode::does_overflow(idx_type, scale_type)) {
|
||||
// May overflow
|
||||
idx_type = TypeInt::INT;
|
||||
mul->destruct(&_igvn);
|
||||
if (!overflow) {
|
||||
max_idx_expr = new ConvI2LNode(max_idx_expr);
|
||||
@ -865,6 +866,7 @@ BoolNode* PhaseIdealLoop::rc_predicate(IdealLoopTree* loop, Node* ctrl, int scal
|
||||
} else {
|
||||
// No overflow possible
|
||||
max_idx_expr = mul;
|
||||
idx_type = (TypeInt*)mul->mul_ring(idx_type, scale_type);
|
||||
}
|
||||
register_new_node(max_idx_expr, ctrl);
|
||||
}
|
||||
|
@ -491,19 +491,19 @@ PhiNode* PhaseIdealLoop::loop_iv_phi(Node* xphi, Node* phi_incr, Node* x, IdealL
|
||||
return phi;
|
||||
}
|
||||
|
||||
static int check_stride_overflow(jlong stride_con, const TypeInteger* limit_t, BasicType bt) {
|
||||
if (stride_con > 0) {
|
||||
if (limit_t->lo_as_long() > (max_signed_integer(bt) - stride_con)) {
|
||||
static int check_stride_overflow(jlong final_correction, const TypeInteger* limit_t, BasicType bt) {
|
||||
if (final_correction > 0) {
|
||||
if (limit_t->lo_as_long() > (max_signed_integer(bt) - final_correction)) {
|
||||
return -1;
|
||||
}
|
||||
if (limit_t->hi_as_long() > (max_signed_integer(bt) - stride_con)) {
|
||||
if (limit_t->hi_as_long() > (max_signed_integer(bt) - final_correction)) {
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
if (limit_t->hi_as_long() < (min_signed_integer(bt) - stride_con)) {
|
||||
if (limit_t->hi_as_long() < (min_signed_integer(bt) - final_correction)) {
|
||||
return -1;
|
||||
}
|
||||
if (limit_t->lo_as_long() < (min_signed_integer(bt) - stride_con)) {
|
||||
if (limit_t->lo_as_long() < (min_signed_integer(bt) - final_correction)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -1773,49 +1773,204 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
|
||||
C->print_method(PHASE_BEFORE_CLOOPS, 3);
|
||||
|
||||
// ===================================================
|
||||
// Generate loop limit check to avoid integer overflow
|
||||
// in cases like next (cyclic loops):
|
||||
// We can only convert this loop to a counted loop if we can guarantee that the iv phi will never overflow at runtime.
|
||||
// This is an implicit assumption taken by some loop optimizations. We therefore must ensure this property at all cost.
|
||||
// At this point, we've already excluded some trivial cases where an overflow could have been proven statically.
|
||||
// But even though we cannot prove that an overflow will *not* happen, we still want to speculatively convert this loop
|
||||
// to a counted loop. This can be achieved by adding additional iv phi overflow checks before the loop. If they fail,
|
||||
// we trap and resume execution before the loop without having executed any iteration of the loop, yet.
|
||||
//
|
||||
// for (i=0; i <= max_jint; i++) {}
|
||||
// for (i=0; i < max_jint; i+=2) {}
|
||||
// These additional iv phi overflow checks can be inserted as Loop Limit Check Predicates above the Loop Limit Check
|
||||
// Parse Predicate which captures a JVM state just before the entry of the loop. If there is no such Parse Predicate,
|
||||
// we cannot generate a Loop Limit Check Predicate and thus cannot speculatively convert the loop to a counted loop.
|
||||
//
|
||||
// In the following, we only focus on int loops with stride > 0 to keep things simple. The argumentation and proof
|
||||
// for stride < 0 is analogously. For long loops, we would replace max_int with max_long.
|
||||
//
|
||||
//
|
||||
// Limit check predicate depends on the loop test:
|
||||
// The loop to be converted does not always need to have the often used shape:
|
||||
//
|
||||
// for(;i != limit; i++) --> limit <= (max_jint)
|
||||
// for(;i < limit; i+=stride) --> limit <= (max_jint - stride + 1)
|
||||
// for(;i <= limit; i+=stride) --> limit <= (max_jint - stride )
|
||||
// i = init
|
||||
// i = init loop:
|
||||
// do { ...
|
||||
// // ... equivalent i+=stride
|
||||
// i+=stride <==> if (i < limit)
|
||||
// } while (i < limit); goto loop
|
||||
// exit:
|
||||
// ...
|
||||
//
|
||||
// where the loop exit check uses the post-incremented iv phi and a '<'-operator.
|
||||
//
|
||||
// We could also have '<='-operator (or '>='-operator for negative strides) or use the pre-incremented iv phi value
|
||||
// in the loop exit check:
|
||||
//
|
||||
// i = init
|
||||
// loop:
|
||||
// ...
|
||||
// if (i <= limit)
|
||||
// i+=stride
|
||||
// goto loop
|
||||
// exit:
|
||||
// ...
|
||||
//
|
||||
// Let's define the following terms:
|
||||
// - iv_pre_i: The pre-incremented iv phi before the i-th iteration.
|
||||
// - iv_post_i: The post-incremented iv phi after the i-th iteration.
|
||||
//
|
||||
// The iv_pre_i and iv_post_i have the following relation:
|
||||
// iv_pre_i + stride = iv_post_i
|
||||
//
|
||||
// When converting a loop to a counted loop, we want to have a canonicalized loop exit check of the form:
|
||||
// iv_post_i < adjusted_limit
|
||||
//
|
||||
// If that is not the case, we need to canonicalize the loop exit check by using different values for adjusted_limit:
|
||||
// (LE1) iv_post_i < limit: Already canonicalized. We can directly use limit as adjusted_limit.
|
||||
// -> adjusted_limit = limit.
|
||||
// (LE2) iv_post_i <= limit:
|
||||
// iv_post_i < limit + 1
|
||||
// -> adjusted limit = limit + 1
|
||||
// (LE3) iv_pre_i < limit:
|
||||
// iv_pre_i + stride < limit + stride
|
||||
// iv_post_i < limit + stride
|
||||
// -> adjusted_limit = limit + stride
|
||||
// (LE4) iv_pre_i <= limit:
|
||||
// iv_pre_i < limit + 1
|
||||
// iv_pre_i + stride < limit + stride + 1
|
||||
// iv_post_i < limit + stride + 1
|
||||
// -> adjusted_limit = limit + stride + 1
|
||||
//
|
||||
// Note that:
|
||||
// (AL) limit <= adjusted_limit.
|
||||
//
|
||||
// The following loop invariant has to hold for counted loops with n iterations (i.e. loop exit check true after n-th
|
||||
// loop iteration) and a canonicalized loop exit check to guarantee that no iv_post_i over- or underflows:
|
||||
// (INV) For i = 1..n, min_int <= iv_post_i <= max_int
|
||||
//
|
||||
// To prove (INV), we require the following two conditions/assumptions:
|
||||
// (i): adjusted_limit - 1 + stride <= max_int
|
||||
// (ii): init < limit
|
||||
//
|
||||
// If we can prove (INV), we know that there can be no over- or underflow of any iv phi value. We prove (INV) by
|
||||
// induction by assuming (i) and (ii).
|
||||
//
|
||||
// Proof by Induction
|
||||
// ------------------
|
||||
// > Base case (i = 1): We show that (INV) holds after the first iteration:
|
||||
// min_int <= iv_post_1 = init + stride <= max_int
|
||||
// Proof:
|
||||
// First, we note that (ii) implies
|
||||
// (iii) init <= limit - 1
|
||||
// max_int >= adjusted_limit - 1 + stride [using (i)]
|
||||
// >= limit - 1 + stride [using (AL)]
|
||||
// >= init + stride [using (iii)]
|
||||
// >= min_int [using stride > 0, no underflow]
|
||||
// Thus, no overflow happens after the first iteration and (INV) holds for i = 1.
|
||||
//
|
||||
// Note that to prove the base case we need (i) and (ii).
|
||||
//
|
||||
// > Induction Hypothesis (i = j, j > 1): Assume that (INV) holds after the j-th iteration:
|
||||
// min_int <= iv_post_j <= max_int
|
||||
// > Step case (i = j + 1): We show that (INV) also holds after the j+1-th iteration:
|
||||
// min_int <= iv_post_{j+1} = iv_post_j + stride <= max_int
|
||||
// Proof:
|
||||
// If iv_post_j >= adjusted_limit:
|
||||
// We exit the loop after the j-th iteration, and we don't execute the j+1-th iteration anymore. Thus, there is
|
||||
// also no iv_{j+1}. Since (INV) holds for iv_j, there is nothing left to prove.
|
||||
// If iv_post_j < adjusted_limit:
|
||||
// First, we note that:
|
||||
// (iv) iv_post_j <= adjusted_limit - 1
|
||||
// max_int >= adjusted_limit - 1 + stride [using (i)]
|
||||
// >= iv_post_j + stride [using (iv)]
|
||||
// >= min_int [using stride > 0, no underflow]
|
||||
//
|
||||
// Note that to prove the step case we only need (i).
|
||||
//
|
||||
// Thus, by assuming (i) and (ii), we proved (INV).
|
||||
//
|
||||
//
|
||||
// It is therefore enough to add the following two Loop Limit Check Predicates to check assumptions (i) and (ii):
|
||||
//
|
||||
// (1) Loop Limit Check Predicate for (i):
|
||||
// Using (i): adjusted_limit - 1 + stride <= max_int
|
||||
//
|
||||
// This condition is now restated to use limit instead of adjusted_limit:
|
||||
//
|
||||
// To prevent an overflow of adjusted_limit -1 + stride itself, we rewrite this check to
|
||||
// max_int - stride + 1 >= adjusted_limit
|
||||
// We can merge the two constants into
|
||||
// canonicalized_correction = stride - 1
|
||||
// which gives us
|
||||
// max_int - canonicalized_correction >= adjusted_limit
|
||||
//
|
||||
// To directly use limit instead of adjusted_limit in the predicate condition, we split adjusted_limit into:
|
||||
// adjusted_limit = limit + limit_correction
|
||||
// Since stride > 0 and limit_correction <= stride + 1, we can restate this with no over- or underflow into:
|
||||
// max_int - canonicalized_correction - limit_correction >= limit
|
||||
// Since canonicalized_correction and limit_correction are both constants, we can replace them with a new constant:
|
||||
// final_correction = canonicalized_correction + limit_correction
|
||||
// which gives us:
|
||||
//
|
||||
// Final predicate condition:
|
||||
// max_int - final_correction >= limit
|
||||
//
|
||||
// (2) Loop Limit Check Predicate for (ii):
|
||||
// Using (ii): init < limit
|
||||
//
|
||||
// This Loop Limit Check Predicate is not required if we can prove at compile time that either:
|
||||
// (2.1) type(init) < type(limit)
|
||||
// In this case, we know:
|
||||
// all possible values of init < all possible values of limit
|
||||
// and we can skip the predicate.
|
||||
//
|
||||
// (2.2) init < limit is already checked before (i.e. found as a dominating check)
|
||||
// In this case, we do not need to re-check the condition and can skip the predicate.
|
||||
// This is often found for while- and for-loops which have the following shape:
|
||||
//
|
||||
// if (init < limit) { // Dominating test. Do not need the Loop Limit Check Predicate below.
|
||||
// i = init;
|
||||
// if (init >= limit) { trap(); } // Here we would insert the Loop Limit Check Predicate
|
||||
// do {
|
||||
// i += stride;
|
||||
// } while (i < limit);
|
||||
// }
|
||||
//
|
||||
// (2.3) init + stride <= max_int
|
||||
// In this case, there is no overflow of the iv phi after the first loop iteration.
|
||||
// In the proof of the base case above we showed that init + stride <= max_int by using assumption (ii):
|
||||
// init < limit
|
||||
// In the proof of the step case above, we did not need (ii) anymore. Therefore, if we already know at
|
||||
// compile time that init + stride <= max_int then we have trivially proven the base case and that
|
||||
// there is no overflow of the iv phi after the first iteration. In this case, we don't need to check (ii)
|
||||
// again and can skip the predicate.
|
||||
|
||||
// Check if limit is excluded to do more precise int overflow check.
|
||||
bool incl_limit = (bt == BoolTest::le || bt == BoolTest::ge);
|
||||
jlong stride_m = stride_con - (incl_limit ? 0 : (stride_con > 0 ? 1 : -1));
|
||||
|
||||
// If compare points directly to the phi we need to adjust
|
||||
// the compare so that it points to the incr. Limit have
|
||||
// to be adjusted to keep trip count the same and the
|
||||
// adjusted limit should be checked for int overflow.
|
||||
Node* adjusted_limit = limit;
|
||||
if (phi_incr != nullptr) {
|
||||
stride_m += stride_con;
|
||||
}
|
||||
// Accounting for (LE3) and (LE4) where we use pre-incremented phis in the loop exit check.
|
||||
const jlong limit_correction_for_pre_iv_exit_check = (phi_incr != nullptr) ? stride_con : 0;
|
||||
|
||||
Node *init_control = x->in(LoopNode::EntryControl);
|
||||
// Accounting for (LE2) and (LE4) where we use <= or >= in the loop exit check.
|
||||
const bool includes_limit = (bt == BoolTest::le || bt == BoolTest::ge);
|
||||
const jlong limit_correction_for_le_ge_exit_check = (includes_limit ? (stride_con > 0 ? 1 : -1) : 0);
|
||||
|
||||
const jlong limit_correction = limit_correction_for_pre_iv_exit_check + limit_correction_for_le_ge_exit_check;
|
||||
const jlong canonicalized_correction = stride_con + (stride_con > 0 ? -1 : 1);
|
||||
const jlong final_correction = canonicalized_correction + limit_correction;
|
||||
|
||||
int sov = check_stride_overflow(final_correction, limit_t, iv_bt);
|
||||
Node* init_control = x->in(LoopNode::EntryControl);
|
||||
|
||||
int sov = check_stride_overflow(stride_m, limit_t, iv_bt);
|
||||
// If sov==0, limit's type always satisfies the condition, for
|
||||
// example, when it is an array length.
|
||||
if (sov != 0) {
|
||||
if (sov < 0) {
|
||||
return false; // Bailout: integer overflow is certain.
|
||||
}
|
||||
// (1) Loop Limit Check Predicate is required because we could not statically prove that
|
||||
// limit + final_correction = adjusted_limit - 1 + stride <= max_int
|
||||
assert(!x->as_Loop()->is_loop_nest_inner_loop(), "loop was transformed");
|
||||
// Generate loop's limit check.
|
||||
// Loop limit check predicate should be near the loop.
|
||||
const Predicates predicates(init_control);
|
||||
const PredicateBlock* loop_limit_check_predicate_block = predicates.loop_limit_check_predicate_block();
|
||||
if (!loop_limit_check_predicate_block->has_parse_predicate()) {
|
||||
// The limit check predicate is not generated if this method trapped here before.
|
||||
// The Loop Limit Check Parse Predicate is not generated if this method trapped here before.
|
||||
#ifdef ASSERT
|
||||
if (TraceLoopLimitCheck) {
|
||||
tty->print("Missing Loop Limit Check Parse Predicate:");
|
||||
@ -1835,67 +1990,81 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
|
||||
Node* bol;
|
||||
|
||||
if (stride_con > 0) {
|
||||
cmp_limit = CmpNode::make(limit, _igvn.integercon(max_signed_integer(iv_bt) - stride_m, iv_bt), iv_bt);
|
||||
cmp_limit = CmpNode::make(limit, _igvn.integercon(max_signed_integer(iv_bt) - final_correction, iv_bt), iv_bt);
|
||||
bol = new BoolNode(cmp_limit, BoolTest::le);
|
||||
} else {
|
||||
cmp_limit = CmpNode::make(limit, _igvn.integercon(min_signed_integer(iv_bt) - stride_m, iv_bt), iv_bt);
|
||||
cmp_limit = CmpNode::make(limit, _igvn.integercon(min_signed_integer(iv_bt) - final_correction, iv_bt), iv_bt);
|
||||
bol = new BoolNode(cmp_limit, BoolTest::ge);
|
||||
}
|
||||
|
||||
insert_loop_limit_check_predicate(init_control->as_IfTrue(), cmp_limit, bol);
|
||||
}
|
||||
|
||||
// Now we need to canonicalize loop condition.
|
||||
if (bt == BoolTest::ne) {
|
||||
assert(stride_con == 1 || stride_con == -1, "simple increment only");
|
||||
if (stride_con > 0 && init_t->hi_as_long() < limit_t->lo_as_long()) {
|
||||
// 'ne' can be replaced with 'lt' only when init < limit.
|
||||
bt = BoolTest::lt;
|
||||
} else if (stride_con < 0 && init_t->lo_as_long() > limit_t->hi_as_long()) {
|
||||
// 'ne' can be replaced with 'gt' only when init > limit.
|
||||
bt = BoolTest::gt;
|
||||
} else {
|
||||
const Predicates predicates(init_control);
|
||||
const PredicateBlock* loop_limit_check_predicate_block = predicates.loop_limit_check_predicate_block();
|
||||
if (!loop_limit_check_predicate_block->has_parse_predicate()) {
|
||||
// The limit check predicate is not generated if this method trapped here before.
|
||||
// (2.3)
|
||||
const bool init_plus_stride_could_overflow =
|
||||
(stride_con > 0 && init_t->hi_as_long() > max_signed_integer(iv_bt) - stride_con) ||
|
||||
(stride_con < 0 && init_t->lo_as_long() < min_signed_integer(iv_bt) - stride_con);
|
||||
// (2.1)
|
||||
const bool init_gte_limit = (stride_con > 0 && init_t->hi_as_long() >= limit_t->lo_as_long()) ||
|
||||
(stride_con < 0 && init_t->lo_as_long() <= limit_t->hi_as_long());
|
||||
|
||||
if (init_gte_limit && // (2.1)
|
||||
((bt == BoolTest::ne || init_plus_stride_could_overflow) && // (2.3)
|
||||
!has_dominating_loop_limit_check(init_trip, limit, stride_con, iv_bt, init_control))) { // (2.2)
|
||||
// (2) Iteration Loop Limit Check Predicate is required because neither (2.1), (2.2), nor (2.3) holds.
|
||||
// We use the following condition:
|
||||
// - stride > 0: init < limit
|
||||
// - stride < 0: init > limit
|
||||
//
|
||||
// This predicate is always required if we have a non-equal-operator in the loop exit check (where stride = 1 is
|
||||
// a requirement). We transform the loop exit check by using a less-than-operator. By doing so, we must always
|
||||
// check that init < limit. Otherwise, we could have a different number of iterations at runtime.
|
||||
|
||||
const Predicates predicates(init_control);
|
||||
const PredicateBlock* loop_limit_check_predicate_block = predicates.loop_limit_check_predicate_block();
|
||||
if (!loop_limit_check_predicate_block->has_parse_predicate()) {
|
||||
// The Loop Limit Check Parse Predicate is not generated if this method trapped here before.
|
||||
#ifdef ASSERT
|
||||
if (TraceLoopLimitCheck) {
|
||||
tty->print("Missing Loop Limit Check Parse Predicate:");
|
||||
loop->dump_head();
|
||||
x->dump(1);
|
||||
}
|
||||
if (TraceLoopLimitCheck) {
|
||||
tty->print("Missing Loop Limit Check Parse Predicate:");
|
||||
loop->dump_head();
|
||||
x->dump(1);
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ParsePredicateNode* loop_limit_check_parse_predicate = loop_limit_check_predicate_block->parse_predicate();
|
||||
Node* parse_predicate_entry = loop_limit_check_parse_predicate->in(0);
|
||||
if (!is_dominator(get_ctrl(limit), parse_predicate_entry) ||
|
||||
!is_dominator(get_ctrl(init_trip), parse_predicate_entry)) {
|
||||
return false;
|
||||
}
|
||||
ParsePredicateNode* loop_limit_check_parse_predicate = loop_limit_check_predicate_block->parse_predicate();
|
||||
Node* parse_predicate_entry = loop_limit_check_parse_predicate->in(0);
|
||||
if (!is_dominator(get_ctrl(limit), parse_predicate_entry) ||
|
||||
!is_dominator(get_ctrl(init_trip), parse_predicate_entry)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Node* cmp_limit;
|
||||
Node* bol;
|
||||
Node* cmp_limit;
|
||||
Node* bol;
|
||||
|
||||
if (stride_con > 0) {
|
||||
cmp_limit = CmpNode::make(init_trip, limit, iv_bt);
|
||||
bol = new BoolNode(cmp_limit, BoolTest::lt);
|
||||
} else {
|
||||
cmp_limit = CmpNode::make(init_trip, limit, iv_bt);
|
||||
bol = new BoolNode(cmp_limit, BoolTest::gt);
|
||||
}
|
||||
if (stride_con > 0) {
|
||||
cmp_limit = CmpNode::make(init_trip, limit, iv_bt);
|
||||
bol = new BoolNode(cmp_limit, BoolTest::lt);
|
||||
} else {
|
||||
cmp_limit = CmpNode::make(init_trip, limit, iv_bt);
|
||||
bol = new BoolNode(cmp_limit, BoolTest::gt);
|
||||
}
|
||||
|
||||
insert_loop_limit_check_predicate(init_control->as_IfTrue(), cmp_limit, bol);
|
||||
insert_loop_limit_check_predicate(init_control->as_IfTrue(), cmp_limit, bol);
|
||||
}
|
||||
|
||||
if (stride_con > 0) {
|
||||
// 'ne' can be replaced with 'lt' only when init < limit.
|
||||
bt = BoolTest::lt;
|
||||
} else if (stride_con < 0) {
|
||||
// 'ne' can be replaced with 'gt' only when init > limit.
|
||||
bt = BoolTest::gt;
|
||||
}
|
||||
if (bt == BoolTest::ne) {
|
||||
// Now we need to canonicalize the loop condition if it is 'ne'.
|
||||
assert(stride_con == 1 || stride_con == -1, "simple increment only - checked before");
|
||||
if (stride_con > 0) {
|
||||
// 'ne' can be replaced with 'lt' only when init < limit. This is ensured by the inserted predicate above.
|
||||
bt = BoolTest::lt;
|
||||
} else {
|
||||
assert(stride_con < 0, "must be");
|
||||
// 'ne' can be replaced with 'gt' only when init > limit. This is ensured by the inserted predicate above.
|
||||
bt = BoolTest::gt;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1940,6 +2109,7 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
|
||||
}
|
||||
#endif
|
||||
|
||||
Node* adjusted_limit = limit;
|
||||
if (phi_incr != nullptr) {
|
||||
// If compare points directly to the phi we need to adjust
|
||||
// the compare so that it points to the incr. Limit have
|
||||
@ -1953,7 +2123,7 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
|
||||
adjusted_limit = gvn->transform(AddNode::make(limit, stride, iv_bt));
|
||||
}
|
||||
|
||||
if (incl_limit) {
|
||||
if (includes_limit) {
|
||||
// The limit check guaranties that 'limit <= (max_jint - stride)' so
|
||||
// we can convert 'i <= limit' to 'i < limit+1' since stride != 0.
|
||||
//
|
||||
@ -2134,6 +2304,37 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if there is a dominating loop limit check of the form 'init < limit' starting at the loop entry.
|
||||
// If there is one, then we do not need to create an additional Loop Limit Check Predicate.
|
||||
bool PhaseIdealLoop::has_dominating_loop_limit_check(Node* init_trip, Node* limit, const jlong stride_con,
|
||||
const BasicType iv_bt, Node* loop_entry) {
|
||||
// Eagerly call transform() on the Cmp and Bool node to common them up if possible. This is required in order to
|
||||
// successfully find a dominated test with the If node below.
|
||||
Node* cmp_limit;
|
||||
Node* bol;
|
||||
if (stride_con > 0) {
|
||||
cmp_limit = _igvn.transform(CmpNode::make(init_trip, limit, iv_bt));
|
||||
bol = _igvn.transform(new BoolNode(cmp_limit, BoolTest::lt));
|
||||
} else {
|
||||
cmp_limit = _igvn.transform(CmpNode::make(init_trip, limit, iv_bt));
|
||||
bol = _igvn.transform(new BoolNode(cmp_limit, BoolTest::gt));
|
||||
}
|
||||
|
||||
// Check if there is already a dominating init < limit check. If so, we do not need a Loop Limit Check Predicate.
|
||||
IfNode* iff = new IfNode(loop_entry, bol, PROB_MIN, COUNT_UNKNOWN);
|
||||
// Also add fake IfProj nodes in order to call transform() on the newly created IfNode.
|
||||
IfFalseNode* if_false = new IfFalseNode(iff);
|
||||
IfTrueNode* if_true = new IfTrueNode(iff);
|
||||
Node* dominated_iff = _igvn.transform(iff);
|
||||
// ConI node? Found dominating test (IfNode::dominated_by() returns a ConI node).
|
||||
const bool found_dominating_test = dominated_iff != nullptr && dominated_iff->is_ConI();
|
||||
|
||||
// Kill the If with its projections again in the next IGVN round by cutting it off from the graph.
|
||||
_igvn.replace_input_of(iff, 0, C->top());
|
||||
_igvn.replace_input_of(iff, 1, C->top());
|
||||
return found_dominating_test;
|
||||
}
|
||||
|
||||
//----------------------exact_limit-------------------------------------------
|
||||
Node* PhaseIdealLoop::exact_limit( IdealLoopTree *loop ) {
|
||||
assert(loop->_head->is_CountedLoop(), "");
|
||||
|
@ -1346,6 +1346,8 @@ public:
|
||||
void rewire_cloned_nodes_to_ctrl(const ProjNode* old_ctrl, Node* new_ctrl, const Node_List& nodes_with_same_ctrl,
|
||||
const Dict& old_new_mapping);
|
||||
void rewire_inputs_of_clones_to_clones(Node* new_ctrl, Node* clone, const Dict& old_new_mapping, const Node* next);
|
||||
bool has_dominating_loop_limit_check(Node* init_trip, Node* limit, jlong stride_con, BasicType iv_bt,
|
||||
Node* loop_entry);
|
||||
|
||||
public:
|
||||
void register_control(Node* n, IdealLoopTree *loop, Node* pred, bool update_body = true);
|
||||
|
@ -281,45 +281,86 @@ Node *MulINode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
return res; // Return final result
|
||||
}
|
||||
|
||||
// Classes to perform mul_ring() for MulI/MulLNode.
|
||||
// This template class performs type multiplication for MulI/MulLNode. NativeType is either jint or jlong.
|
||||
// In this class, the inputs of the MulNodes are named left and right with types [left_lo,left_hi] and [right_lo,right_hi].
|
||||
//
|
||||
// This class checks if all cross products of the left and right input of a multiplication have the same "overflow value".
|
||||
// Without overflow/underflow:
|
||||
// Product is positive? High signed multiplication result: 0
|
||||
// Product is negative? High signed multiplication result: -1
|
||||
// In general, the multiplication of two x-bit values could produce a result that consumes up to 2x bits if there is
|
||||
// enough space to hold them all. We can therefore distinguish the following two cases for the product:
|
||||
// - no overflow (i.e. product fits into x bits)
|
||||
// - overflow (i.e. product does not fit into x bits)
|
||||
//
|
||||
// We normalize these values (see normalize_overflow_value()) such that we get the same "overflow value" by adding 1 if
|
||||
// the product is negative. This allows us to compare all the cross product "overflow values". If one is different,
|
||||
// compared to the others, then we know that this multiplication has a different number of over- or underflows compared
|
||||
// to the others. In this case, we need to use bottom type and cannot guarantee a better type. Otherwise, we can take
|
||||
// the min und max of all computed cross products as type of this Mul node.
|
||||
template<typename IntegerType>
|
||||
class IntegerMulRing {
|
||||
using NativeType = std::conditional_t<std::is_same<TypeInt, IntegerType>::value, jint, jlong>;
|
||||
// When multiplying the two x-bit inputs 'left' and 'right' with their x-bit types [left_lo,left_hi] and [right_lo,right_hi]
|
||||
// we need to find the minimum and maximum of all possible products to define a new type. To do that, we compute the
|
||||
// cross product of [left_lo,left_hi] and [right_lo,right_hi] in 2x-bit space where no over- or underflow can happen.
|
||||
// The cross product consists of the following four multiplications with 2x-bit results:
|
||||
// (1) left_lo * right_lo
|
||||
// (2) left_lo * right_hi
|
||||
// (3) left_hi * right_lo
|
||||
// (4) left_hi * right_hi
|
||||
//
|
||||
// Let's define the following two functions:
|
||||
// - Lx(i): Returns the lower x bits of the 2x-bit number i.
|
||||
// - Ux(i): Returns the upper x bits of the 2x-bit number i.
|
||||
//
|
||||
// Let's first assume all products are positive where only overflows are possible but no underflows. If there is no
|
||||
// overflow for a product p, then the upper x bits of the 2x-bit result p are all zero:
|
||||
// Ux(p) = 0
|
||||
// Lx(p) = p
|
||||
//
|
||||
// If none of the multiplications (1)-(4) overflow, we can truncate the upper x bits and use the following result type
|
||||
// with x bits:
|
||||
// [result_lo,result_hi] = [MIN(Lx(1),Lx(2),Lx(3),Lx(4)),MAX(Lx(1),Lx(2),Lx(3),Lx(4))]
|
||||
//
|
||||
// If any of these multiplications overflows, we could pessimistically take the bottom type for the x bit result
|
||||
// (i.e. all values in the x-bit space could be possible):
|
||||
// [result_lo,result_hi] = [NativeType_min,NativeType_max]
|
||||
//
|
||||
// However, in case of any overflow, we can do better by analyzing the upper x bits of all multiplications (1)-(4) with
|
||||
// 2x-bit results. The upper x bits tell us something about how many times a multiplication has overflown the lower
|
||||
// x bits. If the upper x bits of (1)-(4) are all equal, then we know that all of these multiplications overflowed
|
||||
// the lower x bits the same number of times:
|
||||
// Ux((1)) = Ux((2)) = Ux((3)) = Ux((4))
|
||||
//
|
||||
// If all upper x bits are equal, we can conclude:
|
||||
// Lx(MIN((1),(2),(3),(4))) = MIN(Lx(1),Lx(2),Lx(3),Lx(4)))
|
||||
// Lx(MAX((1),(2),(3),(4))) = MAX(Lx(1),Lx(2),Lx(3),Lx(4)))
|
||||
//
|
||||
// Therefore, we can use the same precise x-bit result type as for the no-overflow case:
|
||||
// [result_lo,result_hi] = [(MIN(Lx(1),Lx(2),Lx(3),Lx(4))),MAX(Lx(1),Lx(2),Lx(3),Lx(4)))]
|
||||
//
|
||||
//
|
||||
// Now let's assume that (1)-(4) are signed multiplications where over- and underflow could occur:
|
||||
// Negative numbers are all sign extend with ones. Therefore, if a negative product does not underflow, then the
|
||||
// upper x bits of the 2x-bit result are all set to ones which is minus one in two's complement. If there is an underflow,
|
||||
// the upper x bits are decremented by the number of times an underflow occurred. The smallest possible negative product
|
||||
// is NativeType_min*NativeType_max, where the upper x bits are set to NativeType_min / 2 (b11...0). It is therefore
|
||||
// impossible to underflow the upper x bits. Thus, when having all ones (i.e. minus one) in the upper x bits, we know
|
||||
// that there is no underflow.
|
||||
//
|
||||
// To be able to compare the number of over-/underflows of positive and negative products, respectively, we normalize
|
||||
// the upper x bits of negative 2x-bit products by adding one. This way a product has no over- or underflow if the
|
||||
// normalized upper x bits are zero. Now we can use the same improved type as for strictly positive products because we
|
||||
// can compare the upper x bits in a unified way with N() being the normalization function:
|
||||
// N(Ux((1))) = N(Ux((2))) = N(Ux((3)) = N(Ux((4)))
|
||||
template<typename NativeType>
|
||||
class IntegerTypeMultiplication {
|
||||
|
||||
NativeType _lo_left;
|
||||
NativeType _lo_right;
|
||||
NativeType _hi_left;
|
||||
NativeType _hi_right;
|
||||
NativeType _lo_lo_product;
|
||||
NativeType _lo_hi_product;
|
||||
NativeType _hi_lo_product;
|
||||
NativeType _hi_hi_product;
|
||||
short _widen_left;
|
||||
short _widen_right;
|
||||
|
||||
static const Type* overflow_type();
|
||||
static NativeType multiply_high_signed_overflow_value(NativeType x, NativeType y);
|
||||
static NativeType multiply_high(NativeType x, NativeType y);
|
||||
const Type* create_type(NativeType lo, NativeType hi) const;
|
||||
|
||||
// Pre-compute cross products which are used at several places
|
||||
void compute_cross_products() {
|
||||
_lo_lo_product = java_multiply(_lo_left, _lo_right);
|
||||
_lo_hi_product = java_multiply(_lo_left, _hi_right);
|
||||
_hi_lo_product = java_multiply(_hi_left, _lo_right);
|
||||
_hi_hi_product = java_multiply(_hi_left, _hi_right);
|
||||
static NativeType multiply_high_signed_overflow_value(NativeType x, NativeType y) {
|
||||
return normalize_overflow_value(x, y, multiply_high(x, y));
|
||||
}
|
||||
|
||||
bool cross_products_not_same_overflow() const {
|
||||
bool cross_product_not_same_overflow_value() const {
|
||||
const NativeType lo_lo_high_product = multiply_high_signed_overflow_value(_lo_left, _lo_right);
|
||||
const NativeType lo_hi_high_product = multiply_high_signed_overflow_value(_lo_left, _hi_right);
|
||||
const NativeType hi_lo_high_product = multiply_high_signed_overflow_value(_hi_left, _lo_right);
|
||||
@ -329,66 +370,95 @@ class IntegerMulRing {
|
||||
hi_lo_high_product != hi_hi_high_product;
|
||||
}
|
||||
|
||||
bool does_product_overflow(NativeType x, NativeType y) const {
|
||||
return multiply_high_signed_overflow_value(x, y) != 0;
|
||||
}
|
||||
|
||||
static NativeType normalize_overflow_value(const NativeType x, const NativeType y, NativeType result) {
|
||||
return java_multiply(x, y) < 0 ? result + 1 : result;
|
||||
}
|
||||
|
||||
public:
|
||||
IntegerMulRing(const IntegerType* left, const IntegerType* right) : _lo_left(left->_lo), _lo_right(right->_lo),
|
||||
_hi_left(left->_hi), _hi_right(right->_hi), _widen_left(left->_widen), _widen_right(right->_widen) {
|
||||
compute_cross_products();
|
||||
}
|
||||
template<class IntegerType>
|
||||
IntegerTypeMultiplication(const IntegerType* left, const IntegerType* right)
|
||||
: _lo_left(left->_lo), _lo_right(right->_lo),
|
||||
_hi_left(left->_hi), _hi_right(right->_hi),
|
||||
_widen_left(left->_widen), _widen_right(right->_widen) {}
|
||||
|
||||
// Compute the product type by multiplying the two input type ranges. We take the minimum and maximum of all possible
|
||||
// values (requires 4 multiplications of all possible combinations of the two range boundary values). If any of these
|
||||
// multiplications overflows/underflows, we need to make sure that they all have the same number of overflows/underflows
|
||||
// If that is not the case, we return the bottom type to cover all values due to the inconsistent overflows/underflows).
|
||||
const Type* compute() const {
|
||||
if (cross_products_not_same_overflow()) {
|
||||
if (cross_product_not_same_overflow_value()) {
|
||||
return overflow_type();
|
||||
}
|
||||
const NativeType min = MIN4(_lo_lo_product, _lo_hi_product, _hi_lo_product, _hi_hi_product);
|
||||
const NativeType max = MAX4(_lo_lo_product, _lo_hi_product, _hi_lo_product, _hi_hi_product);
|
||||
return IntegerType::make(min, max, MAX2(_widen_left, _widen_right));
|
||||
|
||||
NativeType lo_lo_product = java_multiply(_lo_left, _lo_right);
|
||||
NativeType lo_hi_product = java_multiply(_lo_left, _hi_right);
|
||||
NativeType hi_lo_product = java_multiply(_hi_left, _lo_right);
|
||||
NativeType hi_hi_product = java_multiply(_hi_left, _hi_right);
|
||||
const NativeType min = MIN4(lo_lo_product, lo_hi_product, hi_lo_product, hi_hi_product);
|
||||
const NativeType max = MAX4(lo_lo_product, lo_hi_product, hi_lo_product, hi_hi_product);
|
||||
return create_type(min, max);
|
||||
}
|
||||
|
||||
bool does_overflow() const {
|
||||
return does_product_overflow(_lo_left, _lo_right) ||
|
||||
does_product_overflow(_lo_left, _hi_right) ||
|
||||
does_product_overflow(_hi_left, _lo_right) ||
|
||||
does_product_overflow(_hi_left, _hi_right);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <>
|
||||
const Type* IntegerMulRing<TypeInt>::overflow_type() {
|
||||
const Type* IntegerTypeMultiplication<jint>::overflow_type() {
|
||||
return TypeInt::INT;
|
||||
}
|
||||
|
||||
template <>
|
||||
jint IntegerMulRing<TypeInt>::multiply_high_signed_overflow_value(const jint x, const jint y) {
|
||||
jint IntegerTypeMultiplication<jint>::multiply_high(const jint x, const jint y) {
|
||||
const jlong x_64 = x;
|
||||
const jlong y_64 = y;
|
||||
const jlong product = x_64 * y_64;
|
||||
const jint result = (jint)((uint64_t)product >> 32u);
|
||||
return normalize_overflow_value(x, y, result);
|
||||
return (jint)((uint64_t)product >> 32u);
|
||||
}
|
||||
|
||||
template <>
|
||||
const Type* IntegerMulRing<TypeLong>::overflow_type() {
|
||||
const Type* IntegerTypeMultiplication<jint>::create_type(jint lo, jint hi) const {
|
||||
return TypeInt::make(lo, hi, MAX2(_widen_left, _widen_right));
|
||||
}
|
||||
|
||||
template <>
|
||||
const Type* IntegerTypeMultiplication<jlong>::overflow_type() {
|
||||
return TypeLong::LONG;
|
||||
}
|
||||
|
||||
template <>
|
||||
jlong IntegerMulRing<TypeLong>::multiply_high_signed_overflow_value(const jlong x, const jlong y) {
|
||||
const jlong result = multiply_high_signed(x, y);
|
||||
return normalize_overflow_value(x, y, result);
|
||||
jlong IntegerTypeMultiplication<jlong>::multiply_high(const jlong x, const jlong y) {
|
||||
return multiply_high_signed(x, y);
|
||||
}
|
||||
|
||||
template <>
|
||||
const Type* IntegerTypeMultiplication<jlong>::create_type(jlong lo, jlong hi) const {
|
||||
return TypeLong::make(lo, hi, MAX2(_widen_left, _widen_right));
|
||||
}
|
||||
|
||||
// Compute the product type of two integer ranges into this node.
|
||||
const Type* MulINode::mul_ring(const Type* type_left, const Type* type_right) const {
|
||||
const IntegerMulRing<TypeInt> integer_mul_ring(type_left->is_int(), type_right->is_int());
|
||||
return integer_mul_ring.compute();
|
||||
const IntegerTypeMultiplication<jint> integer_multiplication(type_left->is_int(), type_right->is_int());
|
||||
return integer_multiplication.compute();
|
||||
}
|
||||
|
||||
bool MulINode::does_overflow(const TypeInt* type_left, const TypeInt* type_right) {
|
||||
const IntegerTypeMultiplication<jint> integer_multiplication(type_left, type_right);
|
||||
return integer_multiplication.does_overflow();
|
||||
}
|
||||
|
||||
// Compute the product type of two long ranges into this node.
|
||||
const Type* MulLNode::mul_ring(const Type* type_left, const Type* type_right) const {
|
||||
const IntegerMulRing<TypeLong> integer_mul_ring(type_left->is_long(), type_right->is_long());
|
||||
return integer_mul_ring.compute();
|
||||
const IntegerTypeMultiplication<jlong> integer_multiplication(type_left->is_long(), type_right->is_long());
|
||||
return integer_multiplication.compute();
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
|
@ -95,6 +95,7 @@ public:
|
||||
virtual int Opcode() const;
|
||||
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
|
||||
virtual const Type *mul_ring( const Type *, const Type * ) const;
|
||||
static bool does_overflow(const TypeInt* type_left, const TypeInt* type_right);
|
||||
const Type *mul_id() const { return TypeInt::ONE; }
|
||||
const Type *add_id() const { return TypeInt::ZERO; }
|
||||
int add_opcode() const { return Op_AddI; }
|
||||
|
@ -64,6 +64,7 @@ compiler/rtm/locking/TestUseRTMXendForLockBusy.java 8183263 generic-x64,generic-
|
||||
compiler/rtm/print/TestPrintPreciseRTMLockingStatistics.java 8183263 generic-x64,generic-i586
|
||||
|
||||
compiler/c2/Test8004741.java 8235801 generic-all
|
||||
compiler/c2/irTests/TestDuplicateBackedge.java 8318904 generic-all
|
||||
|
||||
compiler/codecache/jmx/PoolsIndependenceTest.java 8264632 macosx-all
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user