8159016: Over-unrolled loop is partially removed

Prevent over-unrolling of loops by computing upper bound for trip count.

Reviewed-by: kvn
This commit is contained in:
Tobias Hartmann 2016-06-27 10:10:11 +02:00
parent cd75cb6a26
commit 4ab4c66c41
4 changed files with 88 additions and 24 deletions
hotspot

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -70,9 +70,9 @@ void IdealLoopTree::record_for_igvn() {
}
//------------------------------compute_exact_trip_count-----------------------
// Compute loop exact trip count if possible. Do not recalculate trip count for
// Compute loop trip count if possible. Do not recalculate trip count for
// split loops (pre-main-post) which have their limits and inits behind Opaque node.
void IdealLoopTree::compute_exact_trip_count( PhaseIdealLoop *phase ) {
void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
if (!_head->as_Loop()->is_valid_counted_loop()) {
return;
}
@ -94,17 +94,21 @@ void IdealLoopTree::compute_exact_trip_count( PhaseIdealLoop *phase ) {
Node* init_n = cl->init_trip();
Node* limit_n = cl->limit();
if (init_n != NULL && init_n->is_Con() &&
limit_n != NULL && limit_n->is_Con()) {
if (init_n != NULL && limit_n != NULL) {
// Use longs to avoid integer overflow.
int stride_con = cl->stride_con();
jlong init_con = cl->init_trip()->get_int();
jlong limit_con = cl->limit()->get_int();
int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
int stride_con = cl->stride_con();
jlong init_con = phase->_igvn.type(init_n)->is_int()->_lo;
jlong limit_con = phase->_igvn.type(limit_n)->is_int()->_hi;
int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
jlong trip_count = (limit_con - init_con + stride_m)/stride_con;
if (trip_count > 0 && (julong)trip_count < (julong)max_juint) {
// Set exact trip count.
cl->set_exact_trip_count((uint)trip_count);
if (init_n->is_Con() && limit_n->is_Con()) {
// Set exact trip count.
cl->set_exact_trip_count((uint)trip_count);
} else if (cl->unrolled_count() == 1) {
// Set maximum trip count before unrolling.
cl->set_trip_count((uint)trip_count);
}
}
}
}
@ -1305,7 +1309,7 @@ Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
assert(main_exit->Opcode() == Op_IfFalse, "");
int dd_main_exit = dom_depth(main_exit);
// Step A1: Clone the loop body of main. The clone becomes the vector post-loop.
// Step A1: Clone the loop body of main. The clone becomes the post-loop.
// The main loop pre-header illegally has 2 control users (old & new loops).
clone_loop(loop, old_new, dd_main_exit);
assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
@ -2095,8 +2099,7 @@ int PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
// the loop is in canonical form to multiversion.
closed_range_checks = 0;
// Check loop body for tests of trip-counter plus loop-invariant vs
// loop-invariant.
// Check loop body for tests of trip-counter plus loop-invariant vs loop-variant.
for( uint i = 0; i < loop->_body.size(); i++ ) {
Node *iff = loop->_body[i];
if (iff->Opcode() == Op_If ||
@ -2298,7 +2301,7 @@ void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) {
// skip this loop if it is already checked
if (cl->has_been_range_checked()) return;
// Now check for existance of range checks
// Now check for existence of range checks
for (uint i = 0; i < loop->_body.size(); i++) {
Node *iff = loop->_body[i];
int iff_opc = iff->Opcode();
@ -2319,7 +2322,7 @@ bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoop
CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
assert(legacy_cl->is_post_loop(), "");
// Check for existance of range checks using the unique instance to make a guard with
// Check for existence of range checks using the unique instance to make a guard with
Unique_Node_List worklist;
for (uint i = 0; i < legacy_loop->_body.size(); i++) {
Node *iff = legacy_loop->_body[i];
@ -2422,7 +2425,7 @@ bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoop
}
//-------------------------poison_rce_post_loop--------------------------------
// Causes the rce'd post loop to be optimized away if multiverioning fails
// Causes the rce'd post loop to be optimized away if multiversioning fails
void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) {
CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop();
Node* ctrl = rce_cl->in(LoopNode::EntryControl);
@ -2710,8 +2713,8 @@ bool IdealLoopTree::policy_do_one_iteration_loop( PhaseIdealLoop *phase ) {
//=============================================================================
//------------------------------iteration_split_impl---------------------------
bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
// Compute exact loop trip count if possible.
compute_exact_trip_count(phase);
// Compute loop trip count if possible.
compute_trip_count(phase);
// Convert one iteration loop into normal code.
if (policy_do_one_iteration_loop(phase))

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -520,8 +520,8 @@ public:
// Return TRUE if "iff" is a range check.
bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const;
// Compute loop exact trip count if possible
void compute_exact_trip_count( PhaseIdealLoop *phase );
// Compute loop trip count if possible
void compute_trip_count(PhaseIdealLoop* phase);
// Compute loop trip count from profile data
void compute_profile_trip_cnt( PhaseIdealLoop *phase );

@ -1596,8 +1596,12 @@ void PhaseMacroExpand::expand_allocate_common(
// All nodes that depended on the InitializeNode for control
// and memory must now depend on the MemBarNode that itself
// depends on the InitializeNode
_igvn.replace_node(init_ctrl, ctrl);
_igvn.replace_node(init_mem, mem);
if (init_ctrl != NULL) {
_igvn.replace_node(init_ctrl, ctrl);
}
if (init_mem != NULL) {
_igvn.replace_node(init_mem, mem);
}
}
}

@ -0,0 +1,57 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8159016
* @requires vm.gc == "Parallel" | vm.gc == "null"
* @summary Tests correct dominator information after over-unrolling a loop.
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xcomp -XX:-TieredCompilation
* -XX:-UseG1GC -XX:+UseParallelGC TestOverunrolling
*/
public class TestOverunrolling {
public static Object test(int arg) {
Object arr[] = new Object[3];
int lim = (arg & 3);
// The pre loop is executed for one iteration, initializing p[0].
// The main loop is unrolled twice, initializing p[1], p[2], p[3] and p[4].
// The p[3] and p[4] stores are always out of bounds and removed. However,
// C2 is unable to remove the "over-unrolled", dead main loop. As a result,
// there is a control path from the main loop to the post loop without a
// memory path (because the last store was replaced by TOP). We crash
// because we use a memory edge from a non-dominating region.
for (int i = 0; i < lim; ++i) {
arr[i] = new Object();
}
// Avoid EA
return arr;
}
public static void main(String args[]) {
for (int i = 0; i < 42; ++i) {
test(i);
}
}
}