8229499: Node budget assert in fuzzed test

Reviewed-by: thartmann, neliasso
This commit is contained in:
Patric Hedlin 2019-09-12 11:44:51 +02:00
parent c1865c4ad3
commit 8275b17b07
4 changed files with 171 additions and 85 deletions
src/hotspot/share/opto
test/hotspot/jtreg/compiler/loopopts

@ -671,76 +671,50 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
loop->record_for_igvn();
}
// The Estimated Loop Unroll Size: UnrollFactor * (106% * BodySize + BC) + CC,
// where BC and CC are (totally) ad-hoc/magic "body" and "clone" constants,
// respectively, used to ensure that node usage estimates made are on the safe
// side, for the most part. This is a simplified version of the loop clone
// size calculation in est_loop_clone_sz(), defined for unroll factors larger
// than one (>1), performing an overflow check and returning 'UINT_MAX' in
// case of an overflow.
static uint est_loop_unroll_sz(uint factor, uint size) {
precond(0 < factor);
uint const bc = 5;
uint const cc = 7;
uint const sz = size + (size + 15) / 16;
uint estimate = factor * (sz + bc) + cc;
return (estimate - cc) / factor == sz + bc ? estimate : UINT_MAX;
}
#define EMPTY_LOOP_SIZE 7 // Number of nodes in an empty loop.
//------------------------------policy_maximally_unroll------------------------
// Calculate the exact loop trip-count and return TRUE if loop can be fully,
// i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
// node budget is also requested.
bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
CountedLoopNode *cl = _head->as_CountedLoop();
bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop* phase) const {
CountedLoopNode* cl = _head->as_CountedLoop();
assert(cl->is_normal_loop(), "");
if (!cl->is_valid_counted_loop()) {
return false; // Malformed counted loop
return false; // Malformed counted loop.
}
if (!cl->has_exact_trip_count()) {
// Trip count is not exact.
return false;
return false; // Trip count is not exact.
}
uint trip_count = cl->trip_count();
// Note, max_juint is used to indicate unknown trip count.
assert(trip_count > 1, "one iteration loop should be optimized out already");
assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
assert(trip_count < max_juint, "exact trip_count should be less than max_juint.");
// If nodes are depleted, some transform has miscalculated its needs.
assert(!phase->exceeding_node_budget(), "sanity");
// Real policy: if we maximally unroll, does it get too big?
// Allow the unrolled mess to get larger than standard loop
// size. After all, it will no longer be a loop.
uint body_size = _body.size();
// Allow the unrolled body to get larger than the standard loop size limit.
uint unroll_limit = (uint)LoopUnrollLimit * 4;
assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
if (trip_count > unroll_limit || body_size > unroll_limit) {
if (trip_count > unroll_limit || _body.size() > unroll_limit) {
return false;
}
// Take into account that after unroll conjoined heads and tails will fold,
// otherwise policy_unroll() may allow more unrolling than max unrolling.
uint new_body_size = est_loop_unroll_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
uint new_body_size = est_loop_unroll_sz(trip_count);
if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
return false;
}
// Fully unroll a loop with few iterations regardless next conditions since
// following loop optimizations will split such loop anyway (pre-main-post).
// Fully unroll a loop with few iterations, regardless of other conditions,
// since the following (general) loop optimizations will split such loop in
// any case (into pre-main-post).
if (trip_count <= 3) {
return phase->may_require_nodes(new_body_size);
}
if (new_body_size > unroll_limit ||
// Unrolling can result in a large amount of node construction
phase->exceeding_node_budget(new_body_size)) {
// Reject if unrolling will result in too much node construction.
if (new_body_size > unroll_limit || phase->exceeding_node_budget(new_body_size)) {
return false;
}

@ -2471,6 +2471,39 @@ uint IdealLoopTree::est_loop_clone_sz(uint factor) const {
assert((estimate - cc) / factor == sz + bc, "overflow");
return estimate + est_loop_flow_merge_sz();
}
// The Estimated Loop (full-) Unroll Size:
// UnrollFactor * (~106% * BodySize) + CC + FanOutTerm,
// where CC is a (totally) ad-hoc/magic "clone" constant, used to ensure that
// node usage estimates made are on the safe side, for the most part. This is
// a "light" version of the loop clone size calculation (above), based on the
// assumption that most of the loop-construct overhead will be unraveled when
// (fully) unrolled. Defined for unroll factors larger or equal to one (>=1),
// including an overflow check and returning UINT_MAX in case of an overflow.
uint IdealLoopTree::est_loop_unroll_sz(uint factor) const {
precond(factor > 0);
// Take into account that after unroll conjoined heads and tails will fold.
uint const b0 = _body.size() - EMPTY_LOOP_SIZE;
uint const cc = 7;
uint const sz = b0 + (b0 + 15) / 16;
uint estimate = factor * sz + cc;
if ((estimate - cc) / factor != sz) {
return UINT_MAX;
}
return estimate + est_loop_flow_merge_sz();
}
// Estimate the growth effect (in nodes) of merging control and data flow when
// cloning a loop body, based on the amount of control and data flow reaching
// outside of the (current) loop body.
uint IdealLoopTree::est_loop_flow_merge_sz() const {
uint ctrl_edge_out_cnt = 0;
uint data_edge_out_cnt = 0;
@ -2494,24 +2527,21 @@ uint IdealLoopTree::est_loop_clone_sz(uint factor) const {
}
}
}
// Add data and control count (x2.0) to estimate iff both are > 0. This is
// Use data and control count (x2.0) in estimate iff both are > 0. This is
// a rather pessimistic estimate for the most part, in particular for some
// complex loops, but still not enough to capture all loops.
if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) {
estimate += 2 * (ctrl_edge_out_cnt + data_edge_out_cnt);
return 2 * (ctrl_edge_out_cnt + data_edge_out_cnt);
}
return estimate;
return 0;
}
#ifndef PRODUCT
//------------------------------dump_head--------------------------------------
// Dump 1 liner for loop header info
void IdealLoopTree::dump_head() const {
for (uint i = 0; i < _nest; i++) {
tty->print(" ");
}
tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
tty->sp(2 * _nest);
tty->print("Loop: N%d/N%d ", _head->_idx, _tail->_idx);
if (_irreducible) tty->print(" IRREDUCIBLE");
Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl);
Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
@ -4501,69 +4531,67 @@ void PhaseIdealLoop::dump_bad_graph(const char* msg, Node* n, Node* early, Node*
#ifndef PRODUCT
//------------------------------dump-------------------------------------------
void PhaseIdealLoop::dump( ) const {
void PhaseIdealLoop::dump() const {
ResourceMark rm;
Arena* arena = Thread::current()->resource_area();
Node_Stack stack(arena, C->live_nodes() >> 2);
Node_List rpo_list;
VectorSet visited(arena);
visited.set(C->top()->_idx);
rpo( C->root(), stack, visited, rpo_list );
rpo(C->root(), stack, visited, rpo_list);
// Dump root loop indexed by last element in PO order
dump( _ltree_root, rpo_list.size(), rpo_list );
dump(_ltree_root, rpo_list.size(), rpo_list);
}
void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
void PhaseIdealLoop::dump(IdealLoopTree* loop, uint idx, Node_List &rpo_list) const {
loop->dump_head();
// Now scan for CFG nodes in the same loop
for( uint j=idx; j > 0; j-- ) {
Node *n = rpo_list[j-1];
if( !_nodes[n->_idx] ) // Skip dead nodes
for (uint j = idx; j > 0; j--) {
Node* n = rpo_list[j-1];
if (!_nodes[n->_idx]) // Skip dead nodes
continue;
if( get_loop(n) != loop ) { // Wrong loop nest
if( get_loop(n)->_head == n && // Found nested loop?
get_loop(n)->_parent == loop )
dump(get_loop(n),rpo_list.size(),rpo_list); // Print it nested-ly
if (get_loop(n) != loop) { // Wrong loop nest
if (get_loop(n)->_head == n && // Found nested loop?
get_loop(n)->_parent == loop)
dump(get_loop(n), rpo_list.size(), rpo_list); // Print it nested-ly
continue;
}
// Dump controlling node
for( uint x = 0; x < loop->_nest; x++ )
tty->print(" ");
tty->sp(2 * loop->_nest);
tty->print("C");
if( n == C->root() ) {
if (n == C->root()) {
n->dump();
} else {
Node* cached_idom = idom_no_update(n);
Node *computed_idom = n->in(0);
if( n->is_Region() ) {
Node* computed_idom = n->in(0);
if (n->is_Region()) {
computed_idom = compute_idom(n);
// computed_idom() will return n->in(0) when idom(n) is an IfNode (or
// any MultiBranch ctrl node), so apply a similar transform to
// the cached idom returned from idom_no_update.
cached_idom = find_non_split_ctrl(cached_idom);
}
tty->print(" ID:%d",computed_idom->_idx);
tty->print(" ID:%d", computed_idom->_idx);
n->dump();
if( cached_idom != computed_idom ) {
if (cached_idom != computed_idom) {
tty->print_cr("*** BROKEN IDOM! Computed as: %d, cached as: %d",
computed_idom->_idx, cached_idom->_idx);
}
}
// Dump nodes it controls
for( uint k = 0; k < _nodes.Size(); k++ ) {
for (uint k = 0; k < _nodes.Size(); k++) {
// (k < C->unique() && get_ctrl(find(k)) == n)
if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) {
Node *m = C->root()->find(k);
if( m && m->outcnt() > 0 ) {
Node* m = C->root()->find(k);
if (m && m->outcnt() > 0) {
if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) {
tty->print_cr("*** BROKEN CTRL ACCESSOR! _nodes[k] is %p, ctrl is %p",
_nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL);
}
for( uint j = 0; j < loop->_nest; j++ )
tty->print(" ");
tty->print(" ");
tty->sp(2 * loop->_nest + 1);
m->dump();
}
}
@ -4574,7 +4602,7 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list )
// Collect a R-P-O for the whole CFG.
// Result list is in post-order (scan backwards for RPO)
void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const {
void PhaseIdealLoop::rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const {
stk.push(start, 0);
visited.set(start->_idx);
@ -4596,7 +4624,7 @@ void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node
//=============================================================================
//------------------------------LoopTreeIterator-----------------------------------
//------------------------------LoopTreeIterator-------------------------------
// Advance to next loop tree using a preorder, left-to-right traversal.
void LoopTreeIterator::next() {

@ -623,6 +623,8 @@ public:
// Estimate the number of nodes required when cloning a loop (body).
uint est_loop_clone_sz(uint factor) const;
// Estimate the number of nodes required when unrolling a loop (body).
uint est_loop_unroll_sz(uint factor) const;
// Compute loop trip count if possible
void compute_trip_count(PhaseIdealLoop* phase);
@ -654,11 +656,16 @@ public:
void remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase);
#ifndef PRODUCT
void dump_head( ) const; // Dump loop head only
void dump_head() const; // Dump loop head only
void dump() const; // Dump this loop recursively
void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
#endif
private:
enum { EMPTY_LOOP_SIZE = 7 }; // Number of nodes in an empty loop.
// Estimate the number of nodes resulting from control and data flow merge.
uint est_loop_flow_merge_sz() const;
};
// -----------------------------PhaseIdealLoop---------------------------------
@ -675,7 +682,7 @@ class PhaseIdealLoop : public PhaseTransform {
PhaseIterGVN &_igvn;
// Head of loop tree
IdealLoopTree *_ltree_root;
IdealLoopTree* _ltree_root;
// Array of pre-order numbers, plus post-visited bit.
// ZERO for not pre-visited. EVEN for pre-visited but not post-visited.
@ -1017,9 +1024,9 @@ public:
bool _has_irreducible_loops;
// Per-Node transform
virtual Node *transform( Node *a_node ) { return 0; }
virtual Node* transform(Node* n) { return 0; }
bool is_counted_loop(Node* x, IdealLoopTree*& loop);
bool is_counted_loop(Node* n, IdealLoopTree* &loop);
IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
IdealLoopTree* loop, float cl_prob, float le_fcnt,
Node*& entry_control, Node*& iffalse);
@ -1034,7 +1041,7 @@ public:
return (IdealLoopTree*)_nodes[n->_idx];
}
IdealLoopTree *ltree_root() const { return _ltree_root; }
IdealLoopTree* ltree_root() const { return _ltree_root; }
// Is 'n' a (nested) member of 'loop'?
int is_member( const IdealLoopTree *loop, Node *n ) const {
@ -1319,7 +1326,7 @@ public:
// same block. Split thru the Region.
void do_split_if( Node *iff );
// Conversion of fill/copy patterns into intrisic versions
// Conversion of fill/copy patterns into intrinsic versions
bool do_intrinsify_fill();
bool intrinsify_fill(IdealLoopTree* lpt);
bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
@ -1419,18 +1426,18 @@ private:
public:
void set_created_loop_node() { _created_loop_node = true; }
bool created_loop_node() { return _created_loop_node; }
void register_new_node( Node *n, Node *blk );
void register_new_node(Node* n, Node* blk);
#ifdef ASSERT
void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA);
#endif
#ifndef PRODUCT
void dump( ) const;
void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const;
void dump() const;
void dump(IdealLoopTree* loop, uint rpo_idx, Node_List &rpo_list) const;
void verify() const; // Major slow :-)
void verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const;
IdealLoopTree *get_loop_idx(Node* n) const {
void verify_compare(Node* n, const PhaseIdealLoop* loop_verify, VectorSet &visited) const;
IdealLoopTree* get_loop_idx(Node* n) const {
// Dead nodes have no loop, so return the top level loop instead
return _nodes[n->_idx] ? (IdealLoopTree*)_nodes[n->_idx] : _ltree_root;
}
@ -1439,7 +1446,8 @@ public:
static int _loop_invokes; // Count of PhaseIdealLoop invokes
static int _loop_work; // Sum of PhaseIdealLoop x _unique
#endif
void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const;
void rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const;
};

@ -0,0 +1,76 @@
/*
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8229499
* @summary Node estimate for loop unrolling is not correct/sufficient:
* assert(delta <= 2 * required) failed: Bad node estimate ...
*
* @requires !vm.graal.enabled
*
* @run main/othervm -XX:-TieredCompilation -XX:-BackgroundCompilation
* LoopUnrollBadNodeBudget
*
*/
public class LoopUnrollBadNodeBudget {
int a;
long b;
int c;
int d(long e, short f, int g) {
int h, j = 2, k, l[][] = new int[a][];
for (h = 8; h < 58; ++h)
for (k = 1; 7 > k; ++k)
switch (h % 9 * 5 + 43) {
case 70:
case 65:
case 86:
case 81:
case 62:
case 69:
case 74:
g = j;
}
long m = u(l);
return (int)m;
}
void n(int p, int o) { d(b, (short)0, p); }
void r(String[] q) {
int i = 4;
n(i, c);
}
long u(int[][] a) {
long sum = 0;
return sum;
}
public static void main(String[] t) {
try {
LoopUnrollBadNodeBudget s = new LoopUnrollBadNodeBudget();
for (int i = 5000; i > 0; i--)
s.r(t);
} catch (Exception ex) {
}
}
}