8314307: Improve loop handling

Co-authored-by: Christian Hagedorn <chagedorn@openjdk.org>
Co-authored-by: Roland Westrelin <rwestrel@redhat.com>
Co-authored-by: Emanuel Peter <epeter@openjdk.org>
Reviewed-by: mschoene, rhalade, thartmann, epeter
This commit is contained in:
Christian Hagedorn 2023-11-07 11:08:30 +00:00 committed by Henry Jen
parent 1cc6fbd9b0
commit c1a568c9c4
7 changed files with 454 additions and 132 deletions

View File

@ -1898,6 +1898,46 @@ Node* RangeCheckNode::Ideal(PhaseGVN *phase, bool can_reshape) {
// then we are guaranteed to fail, so just start interpreting there.
// We 'expand' the top 3 range checks to include all post-dominating
// checks.
//
// Example:
// a[i+x] // (1) 1 < x < 6
// a[i+3] // (2)
// a[i+4] // (3)
// a[i+6] // max = max of all constants
// a[i+2]
// a[i+1] // min = min of all constants
//
// If x < 3:
// (1) a[i+x]: Leave unchanged
// (2) a[i+3]: Replace with a[i+max] = a[i+6]: i+x < i+3 <= i+6 -> (2) is covered
// (3) a[i+4]: Replace with a[i+min] = a[i+1]: i+1 < i+4 <= i+6 -> (3) and all following checks are covered
// Remove all other a[i+c] checks
//
// If x >= 3:
// (1) a[i+x]: Leave unchanged
// (2) a[i+3]: Replace with a[i+min] = a[i+1]: i+1 < i+3 <= i+x -> (2) is covered
// (3) a[i+4]: Replace with a[i+max] = a[i+6]: i+1 < i+4 <= i+6 -> (3) and all following checks are covered
// Remove all other a[i+c] checks
//
// We only need the top 2 range checks if x is the min or max of all constants.
//
// This, however, only works if the interval [i+min,i+max] is not larger than max_int (i.e. abs(max - min) < max_int):
// The theoretical max size of an array is max_int with:
// - Valid index space: [0,max_int-1]
// - Invalid index space: [max_int,-1] // max_int, min_int, min_int - 1 ..., -1
//
// The size of the consecutive valid index space is smaller than the size of the consecutive invalid index space.
// If we choose min and max in such a way that:
// - abs(max - min) < max_int
// - i+max and i+min are inside the valid index space
// then all indices [i+min,i+max] must be in the valid index space. Otherwise, the invalid index space must be
// smaller than the valid index space which is never the case for any array size.
//
// Choosing a smaller array size only makes the valid index space smaller and the invalid index space larger and
// the argument above still holds.
//
// Note that the same optimization with the same maximal accepted interval size can also be found in C1.
const jlong maximum_number_of_min_max_interval_indices = (jlong)max_jint;
// The top 3 range checks seen
const int NRC = 3;
@ -1932,6 +1972,10 @@ Node* RangeCheckNode::Ideal(PhaseGVN *phase, bool can_reshape) {
found_immediate_dominator = true;
break;
}
// "x - y" -> must add one to the difference for number of elements in [x,y]
const jlong diff = (jlong)MIN2(offset2, off_lo) - (jlong)MAX2(offset2, off_hi);
if (ABS(diff) < maximum_number_of_min_max_interval_indices) {
// Gather expanded bounds
off_lo = MIN2(off_lo, offset2);
off_hi = MAX2(off_hi, offset2);
@ -1941,6 +1985,7 @@ Node* RangeCheckNode::Ideal(PhaseGVN *phase, bool can_reshape) {
nb_checks++;
}
}
}
prev_dom = dom;
dom = up_one_dom(dom);
if (!dom) break;

View File

@ -850,9 +850,10 @@ BoolNode* PhaseIdealLoop::rc_predicate(IdealLoopTree* loop, Node* ctrl, int scal
// Check if (scale * max_idx_expr) may overflow
const TypeInt* scale_type = TypeInt::make(scale);
MulINode* mul = new MulINode(max_idx_expr, con_scale);
idx_type = (TypeInt*)mul->mul_ring(idx_type, scale_type);
if (overflow || TypeInt::INT->higher_equal(idx_type)) {
if (overflow || MulINode::does_overflow(idx_type, scale_type)) {
// May overflow
idx_type = TypeInt::INT;
mul->destruct(&_igvn);
if (!overflow) {
max_idx_expr = new ConvI2LNode(max_idx_expr);
@ -865,6 +866,7 @@ BoolNode* PhaseIdealLoop::rc_predicate(IdealLoopTree* loop, Node* ctrl, int scal
} else {
// No overflow possible
max_idx_expr = mul;
idx_type = (TypeInt*)mul->mul_ring(idx_type, scale_type);
}
register_new_node(max_idx_expr, ctrl);
}

View File

@ -491,19 +491,19 @@ PhiNode* PhaseIdealLoop::loop_iv_phi(Node* xphi, Node* phi_incr, Node* x, IdealL
return phi;
}
static int check_stride_overflow(jlong stride_con, const TypeInteger* limit_t, BasicType bt) {
if (stride_con > 0) {
if (limit_t->lo_as_long() > (max_signed_integer(bt) - stride_con)) {
static int check_stride_overflow(jlong final_correction, const TypeInteger* limit_t, BasicType bt) {
if (final_correction > 0) {
if (limit_t->lo_as_long() > (max_signed_integer(bt) - final_correction)) {
return -1;
}
if (limit_t->hi_as_long() > (max_signed_integer(bt) - stride_con)) {
if (limit_t->hi_as_long() > (max_signed_integer(bt) - final_correction)) {
return 1;
}
} else {
if (limit_t->hi_as_long() < (min_signed_integer(bt) - stride_con)) {
if (limit_t->hi_as_long() < (min_signed_integer(bt) - final_correction)) {
return -1;
}
if (limit_t->lo_as_long() < (min_signed_integer(bt) - stride_con)) {
if (limit_t->lo_as_long() < (min_signed_integer(bt) - final_correction)) {
return 1;
}
}
@ -1773,49 +1773,204 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
C->print_method(PHASE_BEFORE_CLOOPS, 3);
// ===================================================
// Generate loop limit check to avoid integer overflow
// in cases like next (cyclic loops):
// We can only convert this loop to a counted loop if we can guarantee that the iv phi will never overflow at runtime.
// This is an implicit assumption taken by some loop optimizations. We therefore must ensure this property at all cost.
// At this point, we've already excluded some trivial cases where an overflow could have been proven statically.
// But even though we cannot prove that an overflow will *not* happen, we still want to speculatively convert this loop
// to a counted loop. This can be achieved by adding additional iv phi overflow checks before the loop. If they fail,
// we trap and resume execution before the loop without having executed any iteration of the loop, yet.
//
// for (i=0; i <= max_jint; i++) {}
// for (i=0; i < max_jint; i+=2) {}
// These additional iv phi overflow checks can be inserted as Loop Limit Check Predicates above the Loop Limit Check
// Parse Predicate which captures a JVM state just before the entry of the loop. If there is no such Parse Predicate,
// we cannot generate a Loop Limit Check Predicate and thus cannot speculatively convert the loop to a counted loop.
//
// In the following, we only focus on int loops with stride > 0 to keep things simple. The argumentation and proof
// for stride < 0 is analogously. For long loops, we would replace max_int with max_long.
//
//
// Limit check predicate depends on the loop test:
// The loop to be converted does not always need to have the often used shape:
//
// for(;i != limit; i++) --> limit <= (max_jint)
// for(;i < limit; i+=stride) --> limit <= (max_jint - stride + 1)
// for(;i <= limit; i+=stride) --> limit <= (max_jint - stride )
// i = init
// i = init loop:
// do { ...
// // ... equivalent i+=stride
// i+=stride <==> if (i < limit)
// } while (i < limit); goto loop
// exit:
// ...
//
// where the loop exit check uses the post-incremented iv phi and a '<'-operator.
//
// We could also have '<='-operator (or '>='-operator for negative strides) or use the pre-incremented iv phi value
// in the loop exit check:
//
// i = init
// loop:
// ...
// if (i <= limit)
// i+=stride
// goto loop
// exit:
// ...
//
// Let's define the following terms:
// - iv_pre_i: The pre-incremented iv phi before the i-th iteration.
// - iv_post_i: The post-incremented iv phi after the i-th iteration.
//
// The iv_pre_i and iv_post_i have the following relation:
// iv_pre_i + stride = iv_post_i
//
// When converting a loop to a counted loop, we want to have a canonicalized loop exit check of the form:
// iv_post_i < adjusted_limit
//
// If that is not the case, we need to canonicalize the loop exit check by using different values for adjusted_limit:
// (LE1) iv_post_i < limit: Already canonicalized. We can directly use limit as adjusted_limit.
// -> adjusted_limit = limit.
// (LE2) iv_post_i <= limit:
// iv_post_i < limit + 1
// -> adjusted limit = limit + 1
// (LE3) iv_pre_i < limit:
// iv_pre_i + stride < limit + stride
// iv_post_i < limit + stride
// -> adjusted_limit = limit + stride
// (LE4) iv_pre_i <= limit:
// iv_pre_i < limit + 1
// iv_pre_i + stride < limit + stride + 1
// iv_post_i < limit + stride + 1
// -> adjusted_limit = limit + stride + 1
//
// Note that:
// (AL) limit <= adjusted_limit.
//
// The following loop invariant has to hold for counted loops with n iterations (i.e. loop exit check true after n-th
// loop iteration) and a canonicalized loop exit check to guarantee that no iv_post_i over- or underflows:
// (INV) For i = 1..n, min_int <= iv_post_i <= max_int
//
// To prove (INV), we require the following two conditions/assumptions:
// (i): adjusted_limit - 1 + stride <= max_int
// (ii): init < limit
//
// If we can prove (INV), we know that there can be no over- or underflow of any iv phi value. We prove (INV) by
// induction by assuming (i) and (ii).
//
// Proof by Induction
// ------------------
// > Base case (i = 1): We show that (INV) holds after the first iteration:
// min_int <= iv_post_1 = init + stride <= max_int
// Proof:
// First, we note that (ii) implies
// (iii) init <= limit - 1
// max_int >= adjusted_limit - 1 + stride [using (i)]
// >= limit - 1 + stride [using (AL)]
// >= init + stride [using (iii)]
// >= min_int [using stride > 0, no underflow]
// Thus, no overflow happens after the first iteration and (INV) holds for i = 1.
//
// Note that to prove the base case we need (i) and (ii).
//
// > Induction Hypothesis (i = j, j > 1): Assume that (INV) holds after the j-th iteration:
// min_int <= iv_post_j <= max_int
// > Step case (i = j + 1): We show that (INV) also holds after the j+1-th iteration:
// min_int <= iv_post_{j+1} = iv_post_j + stride <= max_int
// Proof:
// If iv_post_j >= adjusted_limit:
// We exit the loop after the j-th iteration, and we don't execute the j+1-th iteration anymore. Thus, there is
// also no iv_{j+1}. Since (INV) holds for iv_j, there is nothing left to prove.
// If iv_post_j < adjusted_limit:
// First, we note that:
// (iv) iv_post_j <= adjusted_limit - 1
// max_int >= adjusted_limit - 1 + stride [using (i)]
// >= iv_post_j + stride [using (iv)]
// >= min_int [using stride > 0, no underflow]
//
// Note that to prove the step case we only need (i).
//
// Thus, by assuming (i) and (ii), we proved (INV).
//
//
// It is therefore enough to add the following two Loop Limit Check Predicates to check assumptions (i) and (ii):
//
// (1) Loop Limit Check Predicate for (i):
// Using (i): adjusted_limit - 1 + stride <= max_int
//
// This condition is now restated to use limit instead of adjusted_limit:
//
// To prevent an overflow of adjusted_limit -1 + stride itself, we rewrite this check to
// max_int - stride + 1 >= adjusted_limit
// We can merge the two constants into
// canonicalized_correction = stride - 1
// which gives us
// max_int - canonicalized_correction >= adjusted_limit
//
// To directly use limit instead of adjusted_limit in the predicate condition, we split adjusted_limit into:
// adjusted_limit = limit + limit_correction
// Since stride > 0 and limit_correction <= stride + 1, we can restate this with no over- or underflow into:
// max_int - canonicalized_correction - limit_correction >= limit
// Since canonicalized_correction and limit_correction are both constants, we can replace them with a new constant:
// final_correction = canonicalized_correction + limit_correction
// which gives us:
//
// Final predicate condition:
// max_int - final_correction >= limit
//
// (2) Loop Limit Check Predicate for (ii):
// Using (ii): init < limit
//
// This Loop Limit Check Predicate is not required if we can prove at compile time that either:
// (2.1) type(init) < type(limit)
// In this case, we know:
// all possible values of init < all possible values of limit
// and we can skip the predicate.
//
// (2.2) init < limit is already checked before (i.e. found as a dominating check)
// In this case, we do not need to re-check the condition and can skip the predicate.
// This is often found for while- and for-loops which have the following shape:
//
// if (init < limit) { // Dominating test. Do not need the Loop Limit Check Predicate below.
// i = init;
// if (init >= limit) { trap(); } // Here we would insert the Loop Limit Check Predicate
// do {
// i += stride;
// } while (i < limit);
// }
//
// (2.3) init + stride <= max_int
// In this case, there is no overflow of the iv phi after the first loop iteration.
// In the proof of the base case above we showed that init + stride <= max_int by using assumption (ii):
// init < limit
// In the proof of the step case above, we did not need (ii) anymore. Therefore, if we already know at
// compile time that init + stride <= max_int then we have trivially proven the base case and that
// there is no overflow of the iv phi after the first iteration. In this case, we don't need to check (ii)
// again and can skip the predicate.
// Check if limit is excluded to do more precise int overflow check.
bool incl_limit = (bt == BoolTest::le || bt == BoolTest::ge);
jlong stride_m = stride_con - (incl_limit ? 0 : (stride_con > 0 ? 1 : -1));
// If compare points directly to the phi we need to adjust
// the compare so that it points to the incr. Limit have
// to be adjusted to keep trip count the same and the
// adjusted limit should be checked for int overflow.
Node* adjusted_limit = limit;
if (phi_incr != nullptr) {
stride_m += stride_con;
}
// Accounting for (LE3) and (LE4) where we use pre-incremented phis in the loop exit check.
const jlong limit_correction_for_pre_iv_exit_check = (phi_incr != nullptr) ? stride_con : 0;
// Accounting for (LE2) and (LE4) where we use <= or >= in the loop exit check.
const bool includes_limit = (bt == BoolTest::le || bt == BoolTest::ge);
const jlong limit_correction_for_le_ge_exit_check = (includes_limit ? (stride_con > 0 ? 1 : -1) : 0);
const jlong limit_correction = limit_correction_for_pre_iv_exit_check + limit_correction_for_le_ge_exit_check;
const jlong canonicalized_correction = stride_con + (stride_con > 0 ? -1 : 1);
const jlong final_correction = canonicalized_correction + limit_correction;
int sov = check_stride_overflow(final_correction, limit_t, iv_bt);
Node* init_control = x->in(LoopNode::EntryControl);
int sov = check_stride_overflow(stride_m, limit_t, iv_bt);
// If sov==0, limit's type always satisfies the condition, for
// example, when it is an array length.
if (sov != 0) {
if (sov < 0) {
return false; // Bailout: integer overflow is certain.
}
// (1) Loop Limit Check Predicate is required because we could not statically prove that
// limit + final_correction = adjusted_limit - 1 + stride <= max_int
assert(!x->as_Loop()->is_loop_nest_inner_loop(), "loop was transformed");
// Generate loop's limit check.
// Loop limit check predicate should be near the loop.
const Predicates predicates(init_control);
const PredicateBlock* loop_limit_check_predicate_block = predicates.loop_limit_check_predicate_block();
if (!loop_limit_check_predicate_block->has_parse_predicate()) {
// The limit check predicate is not generated if this method trapped here before.
// The Loop Limit Check Parse Predicate is not generated if this method trapped here before.
#ifdef ASSERT
if (TraceLoopLimitCheck) {
tty->print("Missing Loop Limit Check Parse Predicate:");
@ -1835,30 +1990,40 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
Node* bol;
if (stride_con > 0) {
cmp_limit = CmpNode::make(limit, _igvn.integercon(max_signed_integer(iv_bt) - stride_m, iv_bt), iv_bt);
cmp_limit = CmpNode::make(limit, _igvn.integercon(max_signed_integer(iv_bt) - final_correction, iv_bt), iv_bt);
bol = new BoolNode(cmp_limit, BoolTest::le);
} else {
cmp_limit = CmpNode::make(limit, _igvn.integercon(min_signed_integer(iv_bt) - stride_m, iv_bt), iv_bt);
cmp_limit = CmpNode::make(limit, _igvn.integercon(min_signed_integer(iv_bt) - final_correction, iv_bt), iv_bt);
bol = new BoolNode(cmp_limit, BoolTest::ge);
}
insert_loop_limit_check_predicate(init_control->as_IfTrue(), cmp_limit, bol);
}
// Now we need to canonicalize loop condition.
if (bt == BoolTest::ne) {
assert(stride_con == 1 || stride_con == -1, "simple increment only");
if (stride_con > 0 && init_t->hi_as_long() < limit_t->lo_as_long()) {
// 'ne' can be replaced with 'lt' only when init < limit.
bt = BoolTest::lt;
} else if (stride_con < 0 && init_t->lo_as_long() > limit_t->hi_as_long()) {
// 'ne' can be replaced with 'gt' only when init > limit.
bt = BoolTest::gt;
} else {
// (2.3)
const bool init_plus_stride_could_overflow =
(stride_con > 0 && init_t->hi_as_long() > max_signed_integer(iv_bt) - stride_con) ||
(stride_con < 0 && init_t->lo_as_long() < min_signed_integer(iv_bt) - stride_con);
// (2.1)
const bool init_gte_limit = (stride_con > 0 && init_t->hi_as_long() >= limit_t->lo_as_long()) ||
(stride_con < 0 && init_t->lo_as_long() <= limit_t->hi_as_long());
if (init_gte_limit && // (2.1)
((bt == BoolTest::ne || init_plus_stride_could_overflow) && // (2.3)
!has_dominating_loop_limit_check(init_trip, limit, stride_con, iv_bt, init_control))) { // (2.2)
// (2) Iteration Loop Limit Check Predicate is required because neither (2.1), (2.2), nor (2.3) holds.
// We use the following condition:
// - stride > 0: init < limit
// - stride < 0: init > limit
//
// This predicate is always required if we have a non-equal-operator in the loop exit check (where stride = 1 is
// a requirement). We transform the loop exit check by using a less-than-operator. By doing so, we must always
// check that init < limit. Otherwise, we could have a different number of iterations at runtime.
const Predicates predicates(init_control);
const PredicateBlock* loop_limit_check_predicate_block = predicates.loop_limit_check_predicate_block();
if (!loop_limit_check_predicate_block->has_parse_predicate()) {
// The limit check predicate is not generated if this method trapped here before.
// The Loop Limit Check Parse Predicate is not generated if this method trapped here before.
#ifdef ASSERT
if (TraceLoopLimitCheck) {
tty->print("Missing Loop Limit Check Parse Predicate:");
@ -1888,14 +2053,18 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
}
insert_loop_limit_check_predicate(init_control->as_IfTrue(), cmp_limit, bol);
if (stride_con > 0) {
// 'ne' can be replaced with 'lt' only when init < limit.
bt = BoolTest::lt;
} else if (stride_con < 0) {
// 'ne' can be replaced with 'gt' only when init > limit.
bt = BoolTest::gt;
}
if (bt == BoolTest::ne) {
// Now we need to canonicalize the loop condition if it is 'ne'.
assert(stride_con == 1 || stride_con == -1, "simple increment only - checked before");
if (stride_con > 0) {
// 'ne' can be replaced with 'lt' only when init < limit. This is ensured by the inserted predicate above.
bt = BoolTest::lt;
} else {
assert(stride_con < 0, "must be");
// 'ne' can be replaced with 'gt' only when init > limit. This is ensured by the inserted predicate above.
bt = BoolTest::gt;
}
}
@ -1940,6 +2109,7 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
}
#endif
Node* adjusted_limit = limit;
if (phi_incr != nullptr) {
// If compare points directly to the phi we need to adjust
// the compare so that it points to the incr. Limit have
@ -1953,7 +2123,7 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
adjusted_limit = gvn->transform(AddNode::make(limit, stride, iv_bt));
}
if (incl_limit) {
if (includes_limit) {
// The limit check guaranties that 'limit <= (max_jint - stride)' so
// we can convert 'i <= limit' to 'i < limit+1' since stride != 0.
//
@ -2134,6 +2304,37 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
return true;
}
// Check if there is a dominating loop limit check of the form 'init < limit' starting at the loop entry.
// If there is one, then we do not need to create an additional Loop Limit Check Predicate.
bool PhaseIdealLoop::has_dominating_loop_limit_check(Node* init_trip, Node* limit, const jlong stride_con,
const BasicType iv_bt, Node* loop_entry) {
// Eagerly call transform() on the Cmp and Bool node to common them up if possible. This is required in order to
// successfully find a dominated test with the If node below.
Node* cmp_limit;
Node* bol;
if (stride_con > 0) {
cmp_limit = _igvn.transform(CmpNode::make(init_trip, limit, iv_bt));
bol = _igvn.transform(new BoolNode(cmp_limit, BoolTest::lt));
} else {
cmp_limit = _igvn.transform(CmpNode::make(init_trip, limit, iv_bt));
bol = _igvn.transform(new BoolNode(cmp_limit, BoolTest::gt));
}
// Check if there is already a dominating init < limit check. If so, we do not need a Loop Limit Check Predicate.
IfNode* iff = new IfNode(loop_entry, bol, PROB_MIN, COUNT_UNKNOWN);
// Also add fake IfProj nodes in order to call transform() on the newly created IfNode.
IfFalseNode* if_false = new IfFalseNode(iff);
IfTrueNode* if_true = new IfTrueNode(iff);
Node* dominated_iff = _igvn.transform(iff);
// ConI node? Found dominating test (IfNode::dominated_by() returns a ConI node).
const bool found_dominating_test = dominated_iff != nullptr && dominated_iff->is_ConI();
// Kill the If with its projections again in the next IGVN round by cutting it off from the graph.
_igvn.replace_input_of(iff, 0, C->top());
_igvn.replace_input_of(iff, 1, C->top());
return found_dominating_test;
}
//----------------------exact_limit-------------------------------------------
Node* PhaseIdealLoop::exact_limit( IdealLoopTree *loop ) {
assert(loop->_head->is_CountedLoop(), "");

View File

@ -1346,6 +1346,8 @@ public:
void rewire_cloned_nodes_to_ctrl(const ProjNode* old_ctrl, Node* new_ctrl, const Node_List& nodes_with_same_ctrl,
const Dict& old_new_mapping);
void rewire_inputs_of_clones_to_clones(Node* new_ctrl, Node* clone, const Dict& old_new_mapping, const Node* next);
bool has_dominating_loop_limit_check(Node* init_trip, Node* limit, jlong stride_con, BasicType iv_bt,
Node* loop_entry);
public:
void register_control(Node* n, IdealLoopTree *loop, Node* pred, bool update_body = true);

View File

@ -281,45 +281,86 @@ Node *MulINode::Ideal(PhaseGVN *phase, bool can_reshape) {
return res; // Return final result
}
// Classes to perform mul_ring() for MulI/MulLNode.
// This template class performs type multiplication for MulI/MulLNode. NativeType is either jint or jlong.
// In this class, the inputs of the MulNodes are named left and right with types [left_lo,left_hi] and [right_lo,right_hi].
//
// This class checks if all cross products of the left and right input of a multiplication have the same "overflow value".
// Without overflow/underflow:
// Product is positive? High signed multiplication result: 0
// Product is negative? High signed multiplication result: -1
// In general, the multiplication of two x-bit values could produce a result that consumes up to 2x bits if there is
// enough space to hold them all. We can therefore distinguish the following two cases for the product:
// - no overflow (i.e. product fits into x bits)
// - overflow (i.e. product does not fit into x bits)
//
// We normalize these values (see normalize_overflow_value()) such that we get the same "overflow value" by adding 1 if
// the product is negative. This allows us to compare all the cross product "overflow values". If one is different,
// compared to the others, then we know that this multiplication has a different number of over- or underflows compared
// to the others. In this case, we need to use bottom type and cannot guarantee a better type. Otherwise, we can take
// the min und max of all computed cross products as type of this Mul node.
template<typename IntegerType>
class IntegerMulRing {
using NativeType = std::conditional_t<std::is_same<TypeInt, IntegerType>::value, jint, jlong>;
// When multiplying the two x-bit inputs 'left' and 'right' with their x-bit types [left_lo,left_hi] and [right_lo,right_hi]
// we need to find the minimum and maximum of all possible products to define a new type. To do that, we compute the
// cross product of [left_lo,left_hi] and [right_lo,right_hi] in 2x-bit space where no over- or underflow can happen.
// The cross product consists of the following four multiplications with 2x-bit results:
// (1) left_lo * right_lo
// (2) left_lo * right_hi
// (3) left_hi * right_lo
// (4) left_hi * right_hi
//
// Let's define the following two functions:
// - Lx(i): Returns the lower x bits of the 2x-bit number i.
// - Ux(i): Returns the upper x bits of the 2x-bit number i.
//
// Let's first assume all products are positive where only overflows are possible but no underflows. If there is no
// overflow for a product p, then the upper x bits of the 2x-bit result p are all zero:
// Ux(p) = 0
// Lx(p) = p
//
// If none of the multiplications (1)-(4) overflow, we can truncate the upper x bits and use the following result type
// with x bits:
// [result_lo,result_hi] = [MIN(Lx(1),Lx(2),Lx(3),Lx(4)),MAX(Lx(1),Lx(2),Lx(3),Lx(4))]
//
// If any of these multiplications overflows, we could pessimistically take the bottom type for the x bit result
// (i.e. all values in the x-bit space could be possible):
// [result_lo,result_hi] = [NativeType_min,NativeType_max]
//
// However, in case of any overflow, we can do better by analyzing the upper x bits of all multiplications (1)-(4) with
// 2x-bit results. The upper x bits tell us something about how many times a multiplication has overflown the lower
// x bits. If the upper x bits of (1)-(4) are all equal, then we know that all of these multiplications overflowed
// the lower x bits the same number of times:
// Ux((1)) = Ux((2)) = Ux((3)) = Ux((4))
//
// If all upper x bits are equal, we can conclude:
// Lx(MIN((1),(2),(3),(4))) = MIN(Lx(1),Lx(2),Lx(3),Lx(4)))
// Lx(MAX((1),(2),(3),(4))) = MAX(Lx(1),Lx(2),Lx(3),Lx(4)))
//
// Therefore, we can use the same precise x-bit result type as for the no-overflow case:
// [result_lo,result_hi] = [(MIN(Lx(1),Lx(2),Lx(3),Lx(4))),MAX(Lx(1),Lx(2),Lx(3),Lx(4)))]
//
//
// Now let's assume that (1)-(4) are signed multiplications where over- and underflow could occur:
// Negative numbers are all sign extend with ones. Therefore, if a negative product does not underflow, then the
// upper x bits of the 2x-bit result are all set to ones which is minus one in two's complement. If there is an underflow,
// the upper x bits are decremented by the number of times an underflow occurred. The smallest possible negative product
// is NativeType_min*NativeType_max, where the upper x bits are set to NativeType_min / 2 (b11...0). It is therefore
// impossible to underflow the upper x bits. Thus, when having all ones (i.e. minus one) in the upper x bits, we know
// that there is no underflow.
//
// To be able to compare the number of over-/underflows of positive and negative products, respectively, we normalize
// the upper x bits of negative 2x-bit products by adding one. This way a product has no over- or underflow if the
// normalized upper x bits are zero. Now we can use the same improved type as for strictly positive products because we
// can compare the upper x bits in a unified way with N() being the normalization function:
// N(Ux((1))) = N(Ux((2))) = N(Ux((3)) = N(Ux((4)))
template<typename NativeType>
class IntegerTypeMultiplication {
NativeType _lo_left;
NativeType _lo_right;
NativeType _hi_left;
NativeType _hi_right;
NativeType _lo_lo_product;
NativeType _lo_hi_product;
NativeType _hi_lo_product;
NativeType _hi_hi_product;
short _widen_left;
short _widen_right;
static const Type* overflow_type();
static NativeType multiply_high_signed_overflow_value(NativeType x, NativeType y);
static NativeType multiply_high(NativeType x, NativeType y);
const Type* create_type(NativeType lo, NativeType hi) const;
// Pre-compute cross products which are used at several places
void compute_cross_products() {
_lo_lo_product = java_multiply(_lo_left, _lo_right);
_lo_hi_product = java_multiply(_lo_left, _hi_right);
_hi_lo_product = java_multiply(_hi_left, _lo_right);
_hi_hi_product = java_multiply(_hi_left, _hi_right);
static NativeType multiply_high_signed_overflow_value(NativeType x, NativeType y) {
return normalize_overflow_value(x, y, multiply_high(x, y));
}
bool cross_products_not_same_overflow() const {
bool cross_product_not_same_overflow_value() const {
const NativeType lo_lo_high_product = multiply_high_signed_overflow_value(_lo_left, _lo_right);
const NativeType lo_hi_high_product = multiply_high_signed_overflow_value(_lo_left, _hi_right);
const NativeType hi_lo_high_product = multiply_high_signed_overflow_value(_hi_left, _lo_right);
@ -329,66 +370,95 @@ class IntegerMulRing {
hi_lo_high_product != hi_hi_high_product;
}
bool does_product_overflow(NativeType x, NativeType y) const {
return multiply_high_signed_overflow_value(x, y) != 0;
}
static NativeType normalize_overflow_value(const NativeType x, const NativeType y, NativeType result) {
return java_multiply(x, y) < 0 ? result + 1 : result;
}
public:
IntegerMulRing(const IntegerType* left, const IntegerType* right) : _lo_left(left->_lo), _lo_right(right->_lo),
_hi_left(left->_hi), _hi_right(right->_hi), _widen_left(left->_widen), _widen_right(right->_widen) {
compute_cross_products();
}
template<class IntegerType>
IntegerTypeMultiplication(const IntegerType* left, const IntegerType* right)
: _lo_left(left->_lo), _lo_right(right->_lo),
_hi_left(left->_hi), _hi_right(right->_hi),
_widen_left(left->_widen), _widen_right(right->_widen) {}
// Compute the product type by multiplying the two input type ranges. We take the minimum and maximum of all possible
// values (requires 4 multiplications of all possible combinations of the two range boundary values). If any of these
// multiplications overflows/underflows, we need to make sure that they all have the same number of overflows/underflows
// If that is not the case, we return the bottom type to cover all values due to the inconsistent overflows/underflows).
const Type* compute() const {
if (cross_products_not_same_overflow()) {
if (cross_product_not_same_overflow_value()) {
return overflow_type();
}
const NativeType min = MIN4(_lo_lo_product, _lo_hi_product, _hi_lo_product, _hi_hi_product);
const NativeType max = MAX4(_lo_lo_product, _lo_hi_product, _hi_lo_product, _hi_hi_product);
return IntegerType::make(min, max, MAX2(_widen_left, _widen_right));
NativeType lo_lo_product = java_multiply(_lo_left, _lo_right);
NativeType lo_hi_product = java_multiply(_lo_left, _hi_right);
NativeType hi_lo_product = java_multiply(_hi_left, _lo_right);
NativeType hi_hi_product = java_multiply(_hi_left, _hi_right);
const NativeType min = MIN4(lo_lo_product, lo_hi_product, hi_lo_product, hi_hi_product);
const NativeType max = MAX4(lo_lo_product, lo_hi_product, hi_lo_product, hi_hi_product);
return create_type(min, max);
}
bool does_overflow() const {
return does_product_overflow(_lo_left, _lo_right) ||
does_product_overflow(_lo_left, _hi_right) ||
does_product_overflow(_hi_left, _lo_right) ||
does_product_overflow(_hi_left, _hi_right);
}
};
template <>
const Type* IntegerMulRing<TypeInt>::overflow_type() {
const Type* IntegerTypeMultiplication<jint>::overflow_type() {
return TypeInt::INT;
}
template <>
jint IntegerMulRing<TypeInt>::multiply_high_signed_overflow_value(const jint x, const jint y) {
jint IntegerTypeMultiplication<jint>::multiply_high(const jint x, const jint y) {
const jlong x_64 = x;
const jlong y_64 = y;
const jlong product = x_64 * y_64;
const jint result = (jint)((uint64_t)product >> 32u);
return normalize_overflow_value(x, y, result);
return (jint)((uint64_t)product >> 32u);
}
template <>
const Type* IntegerMulRing<TypeLong>::overflow_type() {
const Type* IntegerTypeMultiplication<jint>::create_type(jint lo, jint hi) const {
return TypeInt::make(lo, hi, MAX2(_widen_left, _widen_right));
}
template <>
const Type* IntegerTypeMultiplication<jlong>::overflow_type() {
return TypeLong::LONG;
}
template <>
jlong IntegerMulRing<TypeLong>::multiply_high_signed_overflow_value(const jlong x, const jlong y) {
const jlong result = multiply_high_signed(x, y);
return normalize_overflow_value(x, y, result);
jlong IntegerTypeMultiplication<jlong>::multiply_high(const jlong x, const jlong y) {
return multiply_high_signed(x, y);
}
template <>
const Type* IntegerTypeMultiplication<jlong>::create_type(jlong lo, jlong hi) const {
return TypeLong::make(lo, hi, MAX2(_widen_left, _widen_right));
}
// Compute the product type of two integer ranges into this node.
const Type* MulINode::mul_ring(const Type* type_left, const Type* type_right) const {
const IntegerMulRing<TypeInt> integer_mul_ring(type_left->is_int(), type_right->is_int());
return integer_mul_ring.compute();
const IntegerTypeMultiplication<jint> integer_multiplication(type_left->is_int(), type_right->is_int());
return integer_multiplication.compute();
}
bool MulINode::does_overflow(const TypeInt* type_left, const TypeInt* type_right) {
const IntegerTypeMultiplication<jint> integer_multiplication(type_left, type_right);
return integer_multiplication.does_overflow();
}
// Compute the product type of two long ranges into this node.
const Type* MulLNode::mul_ring(const Type* type_left, const Type* type_right) const {
const IntegerMulRing<TypeLong> integer_mul_ring(type_left->is_long(), type_right->is_long());
return integer_mul_ring.compute();
const IntegerTypeMultiplication<jlong> integer_multiplication(type_left->is_long(), type_right->is_long());
return integer_multiplication.compute();
}
//=============================================================================

View File

@ -95,6 +95,7 @@ public:
virtual int Opcode() const;
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual const Type *mul_ring( const Type *, const Type * ) const;
static bool does_overflow(const TypeInt* type_left, const TypeInt* type_right);
const Type *mul_id() const { return TypeInt::ONE; }
const Type *add_id() const { return TypeInt::ZERO; }
int add_opcode() const { return Op_AddI; }

View File

@ -64,6 +64,7 @@ compiler/rtm/locking/TestUseRTMXendForLockBusy.java 8183263 generic-x64,generic-
compiler/rtm/print/TestPrintPreciseRTMLockingStatistics.java 8183263 generic-x64,generic-i586
compiler/c2/Test8004741.java 8235801 generic-all
compiler/c2/irTests/TestDuplicateBackedge.java 8318904 generic-all
compiler/codecache/jmx/PoolsIndependenceTest.java 8264632 macosx-all