8327978: C2 SuperWord: Fix compilation time regression in dependency graph traversal after JDK-8325651
Reviewed-by: chagedorn, kvn
This commit is contained in:
parent
c1cfb43d8d
commit
9da5170a0e
@ -3106,14 +3106,37 @@ VStatus VLoopBody::construct() {
|
|||||||
} else if (!post_visited.test(bb_idx(n))) {
|
} else if (!post_visited.test(bb_idx(n))) {
|
||||||
// cross or back arc
|
// cross or back arc
|
||||||
const int old_length = stack.length();
|
const int old_length = stack.length();
|
||||||
|
|
||||||
|
// If a Load depends on the same memory state as a Store, we must make sure that
|
||||||
|
// the Load is ordered before the Store.
|
||||||
|
//
|
||||||
|
// mem
|
||||||
|
// |
|
||||||
|
// +--+--+
|
||||||
|
// | |
|
||||||
|
// | Load (n)
|
||||||
|
// |
|
||||||
|
// Store (mem_use)
|
||||||
|
//
|
||||||
|
if (n->is_Load()) {
|
||||||
|
Node* mem = n->in(MemNode::Memory);
|
||||||
|
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
|
||||||
|
Node* mem_use = mem->fast_out(i);
|
||||||
|
if (mem_use->is_Store() && _vloop.in_bb(mem_use) && !visited.test(bb_idx(mem_use))) {
|
||||||
|
stack.push(mem_use); // Ordering edge: Load (n) -> Store (mem_use)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
|
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
|
||||||
Node* use = n->fast_out(i);
|
Node* use = n->fast_out(i);
|
||||||
if (_vloop.in_bb(use) && !visited.test(bb_idx(use)) &&
|
if (_vloop.in_bb(use) && !visited.test(bb_idx(use)) &&
|
||||||
// Don't go around backedge
|
// Don't go around backedge
|
||||||
(!use->is_Phi() || n == _vloop.cl())) {
|
(!use->is_Phi() || n == _vloop.cl())) {
|
||||||
stack.push(use);
|
stack.push(use); // Ordering edge: n -> use
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stack.length() == old_length) {
|
if (stack.length() == old_length) {
|
||||||
// There were no additional uses, post visit node now
|
// There were no additional uses, post visit node now
|
||||||
stack.pop(); // Remove node from stack
|
stack.pop(); // Remove node from stack
|
||||||
|
@ -292,23 +292,40 @@ void VLoopDependencyGraph::add_node(MemNode* n, GrowableArray<int>& memory_pred_
|
|||||||
_dependency_nodes.at_put_grow(_body.bb_idx(n), dn, nullptr);
|
_dependency_nodes.at_put_grow(_body.bb_idx(n), dn, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int VLoopDependencyGraph::find_max_pred_depth(const Node* n) const {
|
||||||
|
int max_pred_depth = 0;
|
||||||
|
if (!n->is_Phi()) { // ignore backedge
|
||||||
|
for (PredsIterator it(*this, n); !it.done(); it.next()) {
|
||||||
|
Node* pred = it.current();
|
||||||
|
if (_vloop.in_bb(pred)) {
|
||||||
|
max_pred_depth = MAX2(max_pred_depth, depth(pred));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return max_pred_depth;
|
||||||
|
}
|
||||||
|
|
||||||
// We iterate over the body, which is already ordered by the dependencies, i.e. pred comes
|
// We iterate over the body, which is already ordered by the dependencies, i.e. pred comes
|
||||||
// before use. With a single pass, we can compute the depth of every node, since we can
|
// before use. With a single pass, we can compute the depth of every node, since we can
|
||||||
// assume that the depth of all preds is already computed when we compute the depth of use.
|
// assume that the depth of all preds is already computed when we compute the depth of use.
|
||||||
void VLoopDependencyGraph::compute_depth() {
|
void VLoopDependencyGraph::compute_depth() {
|
||||||
for (int i = 0; i < _body.body().length(); i++) {
|
for (int i = 0; i < _body.body().length(); i++) {
|
||||||
Node* n = _body.body().at(i);
|
Node* n = _body.body().at(i);
|
||||||
int max_pred_depth = 0;
|
set_depth(n, find_max_pred_depth(n) + 1);
|
||||||
if (n->is_Phi()) {
|
|
||||||
for (PredsIterator it(*this, n); !it.done(); it.next()) {
|
|
||||||
Node* pred = it.current();
|
|
||||||
if (_vloop.in_bb(pred)) {
|
|
||||||
max_pred_depth = MAX2(max_pred_depth, depth(pred));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
set_depth(n, max_pred_depth + 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ASSERT
|
||||||
|
for (int i = 0; i < _body.body().length(); i++) {
|
||||||
|
Node* n = _body.body().at(i);
|
||||||
|
int max_pred_depth = find_max_pred_depth(n);
|
||||||
|
if (depth(n) != max_pred_depth + 1) {
|
||||||
|
print();
|
||||||
|
tty->print_cr("Incorrect depth: %d vs %d", depth(n), max_pred_depth + 1);
|
||||||
|
n->dump();
|
||||||
|
}
|
||||||
|
assert(depth(n) == max_pred_depth + 1, "must have correct depth");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
|
@ -553,6 +553,7 @@ private:
|
|||||||
void add_node(MemNode* n, GrowableArray<int>& memory_pred_edges);
|
void add_node(MemNode* n, GrowableArray<int>& memory_pred_edges);
|
||||||
int depth(const Node* n) const { return _depths.at(_body.bb_idx(n)); }
|
int depth(const Node* n) const { return _depths.at(_body.bb_idx(n)); }
|
||||||
void set_depth(const Node* n, int d) { _depths.at_put(_body.bb_idx(n), d); }
|
void set_depth(const Node* n, int d) { _depths.at_put(_body.bb_idx(n), d); }
|
||||||
|
int find_max_pred_depth(const Node* n) const;
|
||||||
void compute_depth();
|
void compute_depth();
|
||||||
NOT_PRODUCT( void print() const; )
|
NOT_PRODUCT( void print() const; )
|
||||||
|
|
||||||
|
@ -0,0 +1,130 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package compiler.loopopts.superword;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test
|
||||||
|
* @bug 8327978
|
||||||
|
* @summary Test compile time for large compilation, where SuperWord takes especially much time.
|
||||||
|
* @requires vm.compiler2.enabled
|
||||||
|
* @run main/othervm/timeout=30 -XX:LoopUnrollLimit=1000 -Xbatch
|
||||||
|
* -XX:CompileCommand=compileonly,compiler.loopopts.superword.TestLargeCompilation::test*
|
||||||
|
* compiler.loopopts.superword.TestLargeCompilation
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
public class TestLargeCompilation {
|
||||||
|
private static final Random random = new Random();
|
||||||
|
static final int RANGE_CON = 1024 * 8;
|
||||||
|
|
||||||
|
static int init = 593436;
|
||||||
|
static int limit = 599554;
|
||||||
|
static int offset1 = -592394;
|
||||||
|
static int offset2 = -592386;
|
||||||
|
static final int offset3 = -592394;
|
||||||
|
static final int stride = 4;
|
||||||
|
static final int scale = 1;
|
||||||
|
static final int hand_unrolling1 = 2;
|
||||||
|
static final int hand_unrolling2 = 8;
|
||||||
|
static final int hand_unrolling3 = 15;
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
byte[] aB = generateB();
|
||||||
|
byte[] bB = generateB();
|
||||||
|
byte[] cB = generateB();
|
||||||
|
|
||||||
|
for (int i = 1; i < 100; i++) {
|
||||||
|
testUUBBBH(aB, bB, cB);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static byte[] generateB() {
|
||||||
|
byte[] a = new byte[RANGE_CON];
|
||||||
|
for (int i = 0; i < a.length; i++) {
|
||||||
|
a[i] = (byte)random.nextInt();
|
||||||
|
}
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Object[] testUUBBBH(byte[] a, byte[] b, byte[] c) {
|
||||||
|
int h1 = hand_unrolling1;
|
||||||
|
int h2 = hand_unrolling2;
|
||||||
|
int h3 = hand_unrolling3;
|
||||||
|
|
||||||
|
for (int i = init; i < limit; i += stride) {
|
||||||
|
if (h1 >= 1) { a[offset1 + i * scale + 0]++; }
|
||||||
|
if (h1 >= 2) { a[offset1 + i * scale + 1]++; }
|
||||||
|
if (h1 >= 3) { a[offset1 + i * scale + 2]++; }
|
||||||
|
if (h1 >= 4) { a[offset1 + i * scale + 3]++; }
|
||||||
|
if (h1 >= 5) { a[offset1 + i * scale + 4]++; }
|
||||||
|
if (h1 >= 6) { a[offset1 + i * scale + 5]++; }
|
||||||
|
if (h1 >= 7) { a[offset1 + i * scale + 6]++; }
|
||||||
|
if (h1 >= 8) { a[offset1 + i * scale + 7]++; }
|
||||||
|
if (h1 >= 9) { a[offset1 + i * scale + 8]++; }
|
||||||
|
if (h1 >= 10) { a[offset1 + i * scale + 9]++; }
|
||||||
|
if (h1 >= 11) { a[offset1 + i * scale + 10]++; }
|
||||||
|
if (h1 >= 12) { a[offset1 + i * scale + 11]++; }
|
||||||
|
if (h1 >= 13) { a[offset1 + i * scale + 12]++; }
|
||||||
|
if (h1 >= 14) { a[offset1 + i * scale + 13]++; }
|
||||||
|
if (h1 >= 15) { a[offset1 + i * scale + 14]++; }
|
||||||
|
if (h1 >= 16) { a[offset1 + i * scale + 15]++; }
|
||||||
|
|
||||||
|
if (h2 >= 1) { b[offset2 + i * scale + 0]++; }
|
||||||
|
if (h2 >= 2) { b[offset2 + i * scale + 1]++; }
|
||||||
|
if (h2 >= 3) { b[offset2 + i * scale + 2]++; }
|
||||||
|
if (h2 >= 4) { b[offset2 + i * scale + 3]++; }
|
||||||
|
if (h2 >= 5) { b[offset2 + i * scale + 4]++; }
|
||||||
|
if (h2 >= 6) { b[offset2 + i * scale + 5]++; }
|
||||||
|
if (h2 >= 7) { b[offset2 + i * scale + 6]++; }
|
||||||
|
if (h2 >= 8) { b[offset2 + i * scale + 7]++; }
|
||||||
|
if (h2 >= 9) { b[offset2 + i * scale + 8]++; }
|
||||||
|
if (h2 >= 10) { b[offset2 + i * scale + 9]++; }
|
||||||
|
if (h2 >= 11) { b[offset2 + i * scale + 10]++; }
|
||||||
|
if (h2 >= 12) { b[offset2 + i * scale + 11]++; }
|
||||||
|
if (h2 >= 13) { b[offset2 + i * scale + 12]++; }
|
||||||
|
if (h2 >= 14) { b[offset2 + i * scale + 13]++; }
|
||||||
|
if (h2 >= 15) { b[offset2 + i * scale + 14]++; }
|
||||||
|
if (h2 >= 16) { b[offset2 + i * scale + 15]++; }
|
||||||
|
|
||||||
|
if (h3 >= 1) { c[offset3 + i * scale + 0]++; }
|
||||||
|
if (h3 >= 2) { c[offset3 + i * scale + 1]++; }
|
||||||
|
if (h3 >= 3) { c[offset3 + i * scale + 2]++; }
|
||||||
|
if (h3 >= 4) { c[offset3 + i * scale + 3]++; }
|
||||||
|
if (h3 >= 5) { c[offset3 + i * scale + 4]++; }
|
||||||
|
if (h3 >= 6) { c[offset3 + i * scale + 5]++; }
|
||||||
|
if (h3 >= 7) { c[offset3 + i * scale + 6]++; }
|
||||||
|
if (h3 >= 8) { c[offset3 + i * scale + 7]++; }
|
||||||
|
if (h3 >= 9) { c[offset3 + i * scale + 8]++; }
|
||||||
|
if (h3 >= 10) { c[offset3 + i * scale + 9]++; }
|
||||||
|
if (h3 >= 11) { c[offset3 + i * scale + 10]++; }
|
||||||
|
if (h3 >= 12) { c[offset3 + i * scale + 11]++; }
|
||||||
|
if (h3 >= 13) { c[offset3 + i * scale + 12]++; }
|
||||||
|
if (h3 >= 14) { c[offset3 + i * scale + 13]++; }
|
||||||
|
if (h3 >= 15) { c[offset3 + i * scale + 14]++; }
|
||||||
|
if (h3 >= 16) { c[offset3 + i * scale + 15]++; }
|
||||||
|
}
|
||||||
|
return new Object[]{ a, b, c };
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user