jdk-24/test/hotspot/jtreg/compiler/loopopts/superword/TestEliminateAllocationWithCastP2XUse.java
2024-11-05 11:47:42 +00:00

184 lines
8.7 KiB
Java

/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.loopopts.superword;
/*
* @test
* @bug 8342498
* @summary Test SuperWord, when it aligns to field-store, and the corresponding allocation is eliminated.
* @run driver compiler.loopopts.superword.TestEliminateAllocationWithCastP2XUse
* @run main/othervm -Xbatch
* -XX:-SplitIfBlocks -XX:LoopMaxUnroll=8
* -XX:+UnlockDiagnosticVMOptions -XX:DominatorSearchLimit=45
* compiler.loopopts.superword.TestEliminateAllocationWithCastP2XUse
*/
public class TestEliminateAllocationWithCastP2XUse {
public static void main(String args[]) {
byte[] a = new byte[10_000];
for (int i = 0; i < 10000; i++) {
test(a);
}
}
// Summary:
// - Some B allocations are detected as NoEscape, but cannot be removed because of a field load.
// - The field loads cannot be LoadNode::split_through_phi because DominatorSearchLimit is too low
// for the dominates query to look through some IfNode / IfProj path.
// - We go into loop-opts.
// - In theory, the Stores of B::offset would be moved out of the loop. But we disable
// PhaseIdealLoop::try_move_store_after_loop by setting -XX:-SplitIfBlocks.
// - The field loads are folded away because of some MaxUnroll trick, where the val constant folds to 1.
// - SuperWord eventually kicks in, and vectorizes the array stores.
// - Since some vectorization has happened, SuperWord wants to align the main loop with a memory reference
// in the loop. The code here is not very smart, and just picks the memory reference that occurs the
// most often. But the B::offset stores occur more often than the array stores, and so we align to
// one of the B::offset stores. This inserts a CastP2X under the CheckCastPP of the B allocation.
// - Once loop opts is over, we eventually go into macro expansion.
// - During macro expansion, we now discover that the Allocations were marked NoEscape, and that by now
// there are no field loads any more: yay, we can remove the allocation!
// - ... except that there is the CastP2X from SuperWord alignment ...
// - The Allocation removal code wants to pattern match the CastP2X as part of a GC barrier, but then
// the pattern does not conform to the expecatation - it is after all from SuperWord. This leads to
// an assert, and SIGSEGV in product, at least with G1GC.
public static long test(byte[] a) {
// Delay val == 1 until loop-opts, with MaxUnroll trick.
int val = 0;
for (int i = 0; i < 4; i++) {
if ((i % 2) == 0) {
val = 1;
}
}
// during loop opts, we learn val == 1
// But we don't know that during EscapeAnalysis (EA) yet.
// 9 Allocations, discovered as NoEscape during EA.
B b1 = new B();
B b2 = new B();
B b3 = new B();
B b4 = new B();
B b5 = new B();
B b6 = new B();
B b7 = new B();
B b8 = new B();
B b9 = new B();
// Some path of IfNode / IfProj.
// Only folds away once we know val == 1
// This delays the LoadNode::split_through_phi, because it needs a dominates call
// to succeed, but it cannot look through this path because we set -XX:DominatorSearchLimit=45
// i.e. just a little too low to be able to look through.
// Without the LoadNode::split_through_phi before the end of EA, the Allocation cannot yet be
// removed, due to a "Field load", i.e. that Load for B::offset.
// But later, this path can actually fold away, when we know that val == 1. At that point,
// also the Load from B::offset folds away because LoadNode::split_through_phi succeeds
// At that point the B allocations have no Loads any more, and can be removed... but this only
// happens at macro expansion, after all loop opts.
if (val == 1010) { throw new RuntimeException("never"); }
if (val == 1020) { throw new RuntimeException("never"); }
if (val == 1030) { throw new RuntimeException("never"); }
if (val == 1040) { throw new RuntimeException("never"); }
if (val == 1060) { throw new RuntimeException("never"); }
if (val == 1070) { throw new RuntimeException("never"); }
if (val == 1080) { throw new RuntimeException("never"); }
if (val == 1090) { throw new RuntimeException("never"); }
if (val == 2010) { throw new RuntimeException("never"); }
if (val == 2020) { throw new RuntimeException("never"); }
if (val == 2030) { throw new RuntimeException("never"); }
if (val == 2040) { throw new RuntimeException("never"); }
if (val == 2060) { throw new RuntimeException("never"); }
if (val == 2070) { throw new RuntimeException("never"); }
if (val == 2080) { throw new RuntimeException("never"); }
if (val == 2090) { throw new RuntimeException("never"); }
if (val == 3010) { throw new RuntimeException("never"); }
if (val == 3020) { throw new RuntimeException("never"); }
if (val == 3030) { throw new RuntimeException("never"); }
if (val == 3040) { throw new RuntimeException("never"); }
if (val == 3060) { throw new RuntimeException("never"); }
if (val == 3070) { throw new RuntimeException("never"); }
if (val == 3080) { throw new RuntimeException("never"); }
if (val == 3090) { throw new RuntimeException("never"); }
if (val == 4010) { throw new RuntimeException("never"); }
if (val == 4020) { throw new RuntimeException("never"); }
if (val == 4030) { throw new RuntimeException("never"); }
if (val == 4040) { throw new RuntimeException("never"); }
if (val == 4060) { throw new RuntimeException("never"); }
if (val == 4070) { throw new RuntimeException("never"); }
if (val == 4080) { throw new RuntimeException("never"); }
if (val == 4090) { throw new RuntimeException("never"); }
long mulVal = 1;
for (int i = 0; i < a.length; i++) {
mulVal *= 3;
// We do some vector store, so that SuperWord succeeds, and creates the
// alignment code, which emits the CastP2X.
a[i]++;
// But we also have 9 Stores for the B::offset.
// SuperWord now sees more of these stores than of the array stores, and picks
// one of the B::offset stores as the alignment reference... creating a CastP2X
// for the CheckCastPP of the B allocation.
b1.offset = mulVal;
b2.offset = mulVal;
b3.offset = mulVal;
b4.offset = mulVal;
b5.offset = mulVal;
b6.offset = mulVal;
b7.offset = mulVal;
b8.offset = mulVal;
b9.offset = mulVal;
}
// This folds the loads away, once we know val == 1
// That happens during loop-opts, so after EA, but before macro expansion.
long ret = 0;
if (val == 42) {
ret = b1.offset +
b2.offset +
b3.offset +
b4.offset +
b5.offset +
b6.offset +
b7.offset +
b8.offset +
b9.offset;
}
return ret;
}
static class B {
// Add padding so that the old SuperWord::can_create_pairs accepts the field store to B.offset
long pad1 = 0; // at 16
long pad2 = 0; // at 24
long pad3 = 0; // at 32
long pad4 = 0; // at 40
long pad5 = 0; // at 48
long pad6 = 0; // at 56
long offset = 0; // offset at 64 bytes
}
}