8298848: C2: clone all of (CmpP (LoadKlass (AddP down at split if

Reviewed-by: kvn, thartmann, chagedorn
This commit is contained in:
Roland Westrelin 2023-01-05 09:33:14 +00:00
parent 1ca31d34fc
commit 872384707e
3 changed files with 455 additions and 119 deletions

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1998, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -1743,6 +1743,14 @@ public:
Node_List*& split_bool_set, Node_List*& split_cex_set); Node_List*& split_bool_set, Node_List*& split_cex_set);
void finish_clone_loop(Node_List* split_if_set, Node_List* split_bool_set, Node_List* split_cex_set); void finish_clone_loop(Node_List* split_if_set, Node_List* split_bool_set, Node_List* split_cex_set);
bool clone_cmp_down(Node* n, const Node* blk1, const Node* blk2);
void clone_loadklass_nodes_at_cmp_index(const Node* n, Node* cmp, int i);
bool clone_cmp_loadklass_down(Node* n, const Node* blk1, const Node* blk2);
bool at_relevant_ctrl(Node* n, const Node* blk1, const Node* blk2);
}; };

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -22,6 +22,8 @@
* *
*/ */
#include "opto/addnode.hpp"
#include "opto/node.hpp"
#include "precompiled.hpp" #include "precompiled.hpp"
#include "memory/allocation.inline.hpp" #include "memory/allocation.inline.hpp"
#include "opto/callnode.hpp" #include "opto/callnode.hpp"
@ -69,7 +71,7 @@ bool PhaseIdealLoop::split_up( Node *n, Node *blk1, Node *blk2 ) {
assert( n->in(0) != blk1, "Lousy candidate for split-if" ); assert( n->in(0) != blk1, "Lousy candidate for split-if" );
return false; return false;
} }
if( get_ctrl(n) != blk1 && get_ctrl(n) != blk2 ) if (!at_relevant_ctrl(n, blk1, blk2))
return false; // Not block local return false; // Not block local
if( n->is_Phi() ) return false; // Local PHIs are expected if( n->is_Phi() ) return false; // Local PHIs are expected
@ -83,125 +85,16 @@ bool PhaseIdealLoop::split_up( Node *n, Node *blk1, Node *blk2 ) {
} }
} }
// Check for needing to clone-up a compare. Can't do that, it forces if (clone_cmp_loadklass_down(n, blk1, blk2)) {
// another (nested) split-if transform. Instead, clone it "down".
if( n->is_Cmp() ) {
assert(get_ctrl(n) == blk2 || get_ctrl(n) == blk1, "must be in block with IF");
// Check for simple Cmp/Bool/CMove which we can clone-up. Cmp/Bool/CMove
// sequence can have no other users and it must all reside in the split-if
// block. Non-simple Cmp/Bool/CMove sequences are 'cloned-down' below -
// private, per-use versions of the Cmp and Bool are made. These sink to
// the CMove block. If the CMove is in the split-if block, then in the
// next iteration this will become a simple Cmp/Bool/CMove set to clone-up.
Node *bol, *cmov;
if( !(n->outcnt() == 1 && n->unique_out()->is_Bool() &&
(bol = n->unique_out()->as_Bool()) &&
(get_ctrl(bol) == blk1 ||
get_ctrl(bol) == blk2) &&
bol->outcnt() == 1 &&
bol->unique_out()->is_CMove() &&
(cmov = bol->unique_out()->as_CMove()) &&
(get_ctrl(cmov) == blk1 ||
get_ctrl(cmov) == blk2) ) ) {
// Must clone down
#ifndef PRODUCT
if( PrintOpto && VerifyLoopOptimizations ) {
tty->print("Cloning down: ");
n->dump();
}
#endif
if (!n->is_FastLock()) {
// Clone down any block-local BoolNode uses of this CmpNode
for (DUIterator i = n->outs(); n->has_out(i); i++) {
Node* bol = n->out(i);
assert( bol->is_Bool(), "" );
if (bol->outcnt() == 1) {
Node* use = bol->unique_out();
if (use->Opcode() == Op_Opaque4) {
if (use->outcnt() == 1) {
Node* iff = use->unique_out();
assert(iff->is_If(), "unexpected node type");
Node *use_c = iff->in(0);
if (use_c == blk1 || use_c == blk2) {
continue;
}
}
} else {
// We might see an Opaque1 from a loop limit check here
assert(use->is_If() || use->is_CMove() || use->Opcode() == Op_Opaque1 || use->is_AllocateArray(), "unexpected node type");
Node *use_c = (use->is_If() || use->is_AllocateArray()) ? use->in(0) : get_ctrl(use);
if (use_c == blk1 || use_c == blk2) {
assert(use->is_CMove(), "unexpected node type");
continue;
}
}
}
if (get_ctrl(bol) == blk1 || get_ctrl(bol) == blk2) {
// Recursively sink any BoolNode
#ifndef PRODUCT
if( PrintOpto && VerifyLoopOptimizations ) {
tty->print("Cloning down: ");
bol->dump();
}
#endif
for (DUIterator j = bol->outs(); bol->has_out(j); j++) {
Node* u = bol->out(j);
// Uses are either IfNodes, CMoves or Opaque4
if (u->Opcode() == Op_Opaque4) {
assert(u->in(1) == bol, "bad input");
for (DUIterator_Last kmin, k = u->last_outs(kmin); k >= kmin; --k) {
Node* iff = u->last_out(k);
assert(iff->is_If() || iff->is_CMove(), "unexpected node type");
assert( iff->in(1) == u, "" );
// Get control block of either the CMove or the If input
Node *iff_ctrl = iff->is_If() ? iff->in(0) : get_ctrl(iff);
Node *x1 = bol->clone();
Node *x2 = u->clone();
register_new_node(x1, iff_ctrl);
register_new_node(x2, iff_ctrl);
_igvn.replace_input_of(x2, 1, x1);
_igvn.replace_input_of(iff, 1, x2);
}
_igvn.remove_dead_node(u);
--j;
} else {
// We might see an Opaque1 from a loop limit check here
assert(u->is_If() || u->is_CMove() || u->Opcode() == Op_Opaque1 || u->is_AllocateArray(), "unexpected node type");
assert(u->is_AllocateArray() || u->in(1) == bol, "");
assert(!u->is_AllocateArray() || u->in(AllocateNode::ValidLengthTest) == bol, "wrong input to AllocateArray");
// Get control block of either the CMove or the If input
Node *u_ctrl = (u->is_If() || u->is_AllocateArray()) ? u->in(0) : get_ctrl(u);
assert((u_ctrl != blk1 && u_ctrl != blk2) || u->is_CMove(), "won't converge");
Node *x = bol->clone();
register_new_node(x, u_ctrl);
_igvn.replace_input_of(u, u->is_AllocateArray() ? AllocateNode::ValidLengthTest : 1, x);
--j;
}
}
_igvn.remove_dead_node(bol);
--i;
}
}
}
// Clone down this CmpNode
for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; --j) {
Node* use = n->last_out(j);
uint pos = 1;
if (n->is_FastLock()) {
pos = TypeFunc::Parms + 2;
assert(use->is_Lock(), "FastLock only used by LockNode");
}
assert(use->in(pos) == n, "" );
Node *x = n->clone();
register_new_node(x, ctrl_or_self(use));
_igvn.replace_input_of(use, pos, x);
}
_igvn.remove_dead_node( n );
return true; return true;
} }
// Check for needing to clone-up a compare. Can't do that, it forces
// another (nested) split-if transform. Instead, clone it "down".
if (clone_cmp_down(n, blk1, blk2)) {
return true;
} }
if (subgraph_has_opaque(n)) { if (subgraph_has_opaque(n)) {
Unique_Node_List wq; Unique_Node_List wq;
wq.push(n); wq.push(n);
@ -307,6 +200,232 @@ bool PhaseIdealLoop::split_up( Node *n, Node *blk1, Node *blk2 ) {
return true; return true;
} }
// Look for a (If .. (Bool(CmpP (LoadKlass .. (AddP obj ..)) ..))) and clone all of it down.
// There's likely a CheckCastPP on one of the branches of the If, with obj as input.
// If the (LoadKlass .. (AddP obj ..)) is not cloned down, then split if transforms this to: (If .. (Bool(CmpP phi1 ..)))
// and the CheckCastPP to (CheckCastPP phi2). It's possible then that phi2 is transformed to a CheckCastPP
// (through PhiNode::Ideal) and that that CheckCastPP is replaced by another narrower CheckCastPP at the same control
// (through ConstraintCastNode::Identity). That could cause the CheckCastPP at the If to become top while (CmpP phi1)
// wouldn't constant fold because it's using a different data path. Cloning the whole subgraph down guarantees both the
// AddP and CheckCastPP have the same obj input after split if.
bool PhaseIdealLoop::clone_cmp_loadklass_down(Node* n, const Node* blk1, const Node* blk2) {
if (n->Opcode() == Op_AddP && at_relevant_ctrl(n, blk1, blk2)) {
Node_List cmp_nodes;
uint old = C->unique();
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* u1 = n->fast_out(i);
if (u1->Opcode() == Op_LoadNKlass && at_relevant_ctrl(u1, blk1, blk2)) {
for (DUIterator_Fast jmax, j = u1->fast_outs(jmax); j < jmax; j++) {
Node* u2 = u1->fast_out(j);
if (u2->Opcode() == Op_DecodeNKlass && at_relevant_ctrl(u2, blk1, blk2)) {
for (DUIterator k = u2->outs(); u2->has_out(k); k++) {
Node* u3 = u2->out(k);
if (at_relevant_ctrl(u3, blk1, blk2) && clone_cmp_down(u3, blk1, blk2)) {
--k;
}
}
for (DUIterator_Fast kmax, k = u2->fast_outs(kmax); k < kmax; k++) {
Node* u3 = u2->fast_out(k);
if (u3->_idx >= old) {
cmp_nodes.push(u3);
}
}
}
}
} else if (u1->Opcode() == Op_LoadKlass && at_relevant_ctrl(u1, blk1, blk2)) {
for (DUIterator j = u1->outs(); u1->has_out(j); j++) {
Node* u2 = u1->out(j);
if (at_relevant_ctrl(u2, blk1, blk2) && clone_cmp_down(u2, blk1, blk2)) {
--j;
}
}
for (DUIterator_Fast kmax, k = u1->fast_outs(kmax); k < kmax; k++) {
Node* u2 = u1->fast_out(k);
if (u2->_idx >= old) {
cmp_nodes.push(u2);
}
}
}
}
for (uint i = 0; i < cmp_nodes.size(); ++i) {
Node* cmp = cmp_nodes.at(i);
clone_loadklass_nodes_at_cmp_index(n, cmp, 1);
clone_loadklass_nodes_at_cmp_index(n, cmp, 2);
}
if (n->outcnt() == 0) {
assert(n->is_dead(), "");
return true;
}
}
return false;
}
bool PhaseIdealLoop::at_relevant_ctrl(Node* n, const Node* blk1, const Node* blk2) {
return ctrl_or_self(n) == blk1 || ctrl_or_self(n) == blk2;
}
void PhaseIdealLoop::clone_loadklass_nodes_at_cmp_index(const Node* n, Node* cmp, int i) {
Node* decode = cmp->in(i);
if (decode->Opcode() == Op_DecodeNKlass) {
Node* loadklass = decode->in(1);
if (loadklass->Opcode() == Op_LoadNKlass) {
Node* addp = loadklass->in(MemNode::Address);
if (addp == n) {
Node* ctrl = get_ctrl(cmp);
Node* decode_clone = decode->clone();
Node* loadklass_clone = loadklass->clone();
Node* addp_clone = addp->clone();
register_new_node(decode_clone, ctrl);
register_new_node(loadklass_clone, ctrl);
register_new_node(addp_clone, ctrl);
_igvn.replace_input_of(cmp, i, decode_clone);
_igvn.replace_input_of(decode_clone, 1, loadklass_clone);
_igvn.replace_input_of(loadklass_clone, MemNode::Address, addp_clone);
if (decode->outcnt() == 0) {
_igvn.remove_dead_node(decode);
}
}
}
} else {
Node* loadklass = cmp->in(i);
if (loadklass->Opcode() == Op_LoadKlass) {
Node* addp = loadklass->in(MemNode::Address);
if (addp == n) {
Node* ctrl = get_ctrl(cmp);
Node* loadklass_clone = loadklass->clone();
Node* addp_clone = addp->clone();
register_new_node(loadklass_clone, ctrl);
register_new_node(addp_clone, ctrl);
_igvn.replace_input_of(cmp, i, loadklass_clone);
_igvn.replace_input_of(loadklass_clone, MemNode::Address, addp_clone);
if (loadklass->outcnt() == 0) {
_igvn.remove_dead_node(loadklass);
}
}
}
}
}
bool PhaseIdealLoop::clone_cmp_down(Node* n, const Node* blk1, const Node* blk2) {
if( n->is_Cmp() ) {
assert(get_ctrl(n) == blk2 || get_ctrl(n) == blk1, "must be in block with IF");
// Check for simple Cmp/Bool/CMove which we can clone-up. Cmp/Bool/CMove
// sequence can have no other users and it must all reside in the split-if
// block. Non-simple Cmp/Bool/CMove sequences are 'cloned-down' below -
// private, per-use versions of the Cmp and Bool are made. These sink to
// the CMove block. If the CMove is in the split-if block, then in the
// next iteration this will become a simple Cmp/Bool/CMove set to clone-up.
Node *bol, *cmov;
if (!(n->outcnt() == 1 && n->unique_out()->is_Bool() &&
(bol = n->unique_out()->as_Bool()) &&
(at_relevant_ctrl(bol, blk1, blk2) &&
bol->outcnt() == 1 &&
bol->unique_out()->is_CMove() &&
(cmov = bol->unique_out()->as_CMove()) &&
at_relevant_ctrl(cmov, blk1, blk2)))) {
// Must clone down
#ifndef PRODUCT
if( PrintOpto && VerifyLoopOptimizations ) {
tty->print("Cloning down: ");
n->dump();
}
#endif
if (!n->is_FastLock()) {
// Clone down any block-local BoolNode uses of this CmpNode
for (DUIterator i = n->outs(); n->has_out(i); i++) {
Node* bol = n->out(i);
assert( bol->is_Bool(), "" );
if (bol->outcnt() == 1) {
Node* use = bol->unique_out();
if (use->Opcode() == Op_Opaque4) {
if (use->outcnt() == 1) {
Node* iff = use->unique_out();
assert(iff->is_If(), "unexpected node type");
Node *use_c = iff->in(0);
if (use_c == blk1 || use_c == blk2) {
continue;
}
}
} else {
// We might see an Opaque1 from a loop limit check here
assert(use->is_If() || use->is_CMove() || use->Opcode() == Op_Opaque1 || use->is_AllocateArray(), "unexpected node type");
Node *use_c = (use->is_If() || use->is_AllocateArray()) ? use->in(0) : get_ctrl(use);
if (use_c == blk1 || use_c == blk2) {
assert(use->is_CMove(), "unexpected node type");
continue;
}
}
}
if (at_relevant_ctrl(bol, blk1, blk2)) {
// Recursively sink any BoolNode
#ifndef PRODUCT
if( PrintOpto && VerifyLoopOptimizations ) {
tty->print("Cloning down: ");
bol->dump();
}
#endif
for (DUIterator j = bol->outs(); bol->has_out(j); j++) {
Node* u = bol->out(j);
// Uses are either IfNodes, CMoves or Opaque4
if (u->Opcode() == Op_Opaque4) {
assert(u->in(1) == bol, "bad input");
for (DUIterator_Last kmin, k = u->last_outs(kmin); k >= kmin; --k) {
Node* iff = u->last_out(k);
assert(iff->is_If() || iff->is_CMove(), "unexpected node type");
assert( iff->in(1) == u, "" );
// Get control block of either the CMove or the If input
Node *iff_ctrl = iff->is_If() ? iff->in(0) : get_ctrl(iff);
Node *x1 = bol->clone();
Node *x2 = u->clone();
register_new_node(x1, iff_ctrl);
register_new_node(x2, iff_ctrl);
_igvn.replace_input_of(x2, 1, x1);
_igvn.replace_input_of(iff, 1, x2);
}
_igvn.remove_dead_node(u);
--j;
} else {
// We might see an Opaque1 from a loop limit check here
assert(u->is_If() || u->is_CMove() || u->Opcode() == Op_Opaque1 || u->is_AllocateArray(), "unexpected node type");
assert(u->is_AllocateArray() || u->in(1) == bol, "");
assert(!u->is_AllocateArray() || u->in(AllocateNode::ValidLengthTest) == bol, "wrong input to AllocateArray");
// Get control block of either the CMove or the If input
Node *u_ctrl = (u->is_If() || u->is_AllocateArray()) ? u->in(0) : get_ctrl(u);
assert((u_ctrl != blk1 && u_ctrl != blk2) || u->is_CMove(), "won't converge");
Node *x = bol->clone();
register_new_node(x, u_ctrl);
_igvn.replace_input_of(u, u->is_AllocateArray() ? AllocateNode::ValidLengthTest : 1, x);
--j;
}
}
_igvn.remove_dead_node(bol);
--i;
}
}
}
// Clone down this CmpNode
for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; --j) {
Node* use = n->last_out(j);
uint pos = 1;
if (n->is_FastLock()) {
pos = TypeFunc::Parms + 2;
assert(use->is_Lock(), "FastLock only used by LockNode");
}
assert(use->in(pos) == n, "" );
Node *x = n->clone();
register_new_node(x, ctrl_or_self(use));
_igvn.replace_input_of(use, pos, x);
}
_igvn.remove_dead_node(n);
return true;
}
}
return false;
}
//------------------------------register_new_node------------------------------ //------------------------------register_new_node------------------------------
void PhaseIdealLoop::register_new_node( Node *n, Node *blk ) { void PhaseIdealLoop::register_new_node( Node *n, Node *blk ) {
assert(!n->is_CFG(), "must be data node"); assert(!n->is_CFG(), "must be data node");

View File

@ -0,0 +1,209 @@
/*
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8297345
* @summary C2: SIGSEGV in PhaseIdealLoop::push_pinned_nodes_thru_region
* @requires vm.gc.Parallel
*
* @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation
* -XX:CompileOnly=TestCheckCastPPBecomesTOP::test1 -XX:LoopMaxUnroll=0
* -XX:CompileCommand=dontinline,TestCheckCastPPBecomesTOP::notInlined -XX:+UseParallelGC TestCheckCastPPBecomesTOP
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation
* -XX:CompileOnly=TestCheckCastPPBecomesTOP::test1 -XX:LoopMaxUnroll=0
* -XX:CompileCommand=dontinline,TestCheckCastPPBecomesTOP::notInlined -XX:+UseParallelGC -XX:-UseCompressedClassPointers TestCheckCastPPBecomesTOP
*
*/
public class TestCheckCastPPBecomesTOP {
private static I field;
private static I field2;
private static I field3;
private static volatile int barrier;
public static void main(String[] args) {
A a = new A();
B b = new B();
for (int i = 0; i < 100_000; i++) {
test1Helper3(5);
field2 = field = a;
test1Helper1(b, 100, 100);
test1Helper1(b, 100, 100);
test1Helper1(b, 100, 100);
field2 = field = b;
test1Helper1(b, 100, 100);
test1Helper1(b, 100, 100);
field2 = field = a;
test1Helper1(b, 10, 100);
test1Helper1(b, 10, 100);
test1Helper1(b, 10, 100);
field2 = field = b;
test1Helper1(b, 10, 100);
test1Helper1(b, 10, 100);
field2 = field = a;
test1Helper1(b, 10, 10);
test1Helper1(b, 10, 10);
test1Helper1(b, 10, 10);
field2 = field = b;
test1Helper1(b, 10, 10);
test1Helper1(b, 10, 10);
field2 = field = a;
test1Helper2(b, true);
field2 = field = b;
test1Helper2(b, true);
test1(false);
}
}
private static void test1(boolean flag1) {
I f = field;
if (f == null) {
}
test1Helper3(10);
test1Helper2(f, flag1);
for (int j = 0; j < 10; j++) {
for (int k = 0; k < 10; k++) {
for (int l = 0; l < 10; l++) {
}
}
}
}
private static void test1Helper3(int stop) {
int i;
for (i = 0; i < stop; i++) {
}
if (i != 10) {
barrier = 0x42;
}
}
private static void test1Helper2(I f2, boolean flag1) {
if (flag1) {
if (f2 == null) {
}
int i;
for (i = 0; i < 10; i++) {
}
int j;
for (j = 0; j < 10; j++) {
for (int k = 0; k < 10; k++) {
}
}
test1Helper1(f2, i, j);
}
}
private static void test1Helper1(I f2, int i, int j) {
I f1 = field;
if (f1 == null) {
}
I f3 = field2;
if (f3 == null) {
}
field2 = f3;
field = f1;
if (i == 10) {
if (j == 10) {
f1.m1();
} else {
f1 = f3;
}
f3.m2(f1);
} else {
f1 = f3;
}
I f4 = field2;
field = f1;
f4.m3(f1, f2);
I f5 = field;
barrier = 0x42;
f5.m4(f2);
}
private static void notInlined(Object o1, Object o2) {
}
interface I {
void m1();
void m2(I f);
void m3(I f1, I f2);
void m4(I f2);
}
static class A implements I {
public void m1() {
}
public void m2(I f) {
f.m1();
}
public void m3(I f1, I f2) {
f1.m1();
f2.m1();
}
public void m4(I f2) {
notInlined(this, f2);
field3 = this;
}
}
static class B implements I {
public void m1() {
}
public void m2(I f) {
f.m1();
}
public void m3(I f1, I f2) {
f1.m1();
f2.m1();
}
public void m4(I f2) {
notInlined(this, f2);
field3 = this;
}
}
}