8255763: C2: OSR miscompilation caused by invalid memory instruction placement

Disable GCM hoisting of memory-writing nodes for irreducible CFGs. This prevents
GCM from wrongly "hoisting" stores into descendants of their original loop. Such
an "inverted hoisting" can happen due to CFGLoop::compute_freq()'s inaccurate
estimation of frequencies for irreducible CFGs.

Extend CFG verification code by checking that memory-writing nodes are placed in
either their original loop or an ancestor.

Add tests for the reducible and irreducible cases. The former was already
handled correctly before the change (the frequency estimation model prevents
"inverted hoisting" for reducible CFGs), and is just added for coverage.

This change addresses the specific miscompilation issue in a conservative way,
for simplicity and safety. Future work includes investigating if only the
illegal blocks can be discarded as candidates for GCM hoisting, and refining
frequency estimation for irreducible CFGs.

Reviewed-by: kvn, chagedorn
This commit is contained in:
Roberto Castañeda Lozano 2020-12-21 13:04:24 +00:00 committed by Christian Hagedorn
parent 2525f39d35
commit 4e8338eb13
4 changed files with 161 additions and 4 deletions
src/hotspot/share/opto
test/hotspot/jtreg/compiler/codegen

@ -1221,6 +1221,26 @@ void PhaseCFG::verify() const {
if (j >= 1 && n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CreateEx) {
assert(j == 1 || block->get_node(j-1)->is_Phi(), "CreateEx must be first instruction in block");
}
// Verify that memory-writing nodes (such as stores and calls) are placed
// in their original loop L (given by the control input) or in an ancestor
// of L. This is guaranteed by the freq. estimation model for reducible
// CFGs, and by special handling in PhaseCFG::schedule_late() otherwise.
if (n->is_Mach() && n->bottom_type()->has_memory() && n->in(0) != NULL) {
Block* original_block = find_block_for_node(n->in(0));
assert(original_block != NULL, "missing block for memory-writing node");
CFGLoop* original_or_ancestor = original_block->_loop;
assert(block->_loop != NULL && original_or_ancestor != NULL, "no loop");
bool found = false;
do {
if (block->_loop == original_or_ancestor) {
found = true;
break;
}
original_or_ancestor = original_or_ancestor->parent();
} while (original_or_ancestor != NULL);
assert(found, "memory-writing node is not placed in its original loop "
"or an ancestor of it");
}
if (n->needs_anti_dependence_check()) {
verify_anti_dependences(block, n);
}

@ -501,8 +501,8 @@ class PhaseCFG : public Phase {
CFGLoop* create_loop_tree();
bool is_dominator(Node* dom_node, Node* node);
bool is_CFG(Node* n);
bool is_control_proj_or_safepoint(Node* n);
Block* find_block_for_node(Node* n);
bool is_control_proj_or_safepoint(Node* n) const;
Block* find_block_for_node(Node* n) const;
bool is_dominating_control(Node* dom_ctrl, Node* n);
#ifndef PRODUCT
bool _trace_opto_pipelining; // tracing flag

@ -152,14 +152,14 @@ bool PhaseCFG::is_CFG(Node* n) {
return n->is_block_proj() || n->is_block_start() || is_control_proj_or_safepoint(n);
}
bool PhaseCFG::is_control_proj_or_safepoint(Node* n) {
bool PhaseCFG::is_control_proj_or_safepoint(Node* n) const {
bool result = (n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_SafePoint) || (n->is_Proj() && n->as_Proj()->bottom_type() == Type::CONTROL);
assert(!result || (n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_SafePoint)
|| (n->is_Proj() && n->as_Proj()->_con == 0), "If control projection, it must be projection 0");
return result;
}
Block* PhaseCFG::find_block_for_node(Node* n) {
Block* PhaseCFG::find_block_for_node(Node* n) const {
if (n->is_block_start() || n->is_block_proj()) {
return get_block_for_node(n);
} else {
@ -1274,6 +1274,46 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) {
default:
break;
}
if (C->has_irreducible_loop() && self->bottom_type()->has_memory()) {
// If the CFG is irreducible, keep memory-writing nodes as close as
// possible to their original block (given by the control input). This
// prevents PhaseCFG::hoist_to_cheaper_block() from placing such nodes
// into descendants of their original loop, as in the following example:
//
// Original placement of store in B1 (loop L1):
//
// B1 (L1):
// m1 <- ..
// m2 <- store m1, ..
// B2 (L2):
// jump B2
// B3 (L1):
// .. <- .. m2, ..
//
// Wrong "hoisting" of store to B2 (in loop L2, child of L1):
//
// B1 (L1):
// m1 <- ..
// B2 (L2):
// m2 <- store m1, ..
// # Wrong: m1 and m2 interfere at this point.
// jump B2
// B3 (L1):
// .. <- .. m2, ..
//
// This "hoist inversion" can happen due to CFGLoop::compute_freq()'s
// inaccurate estimation of frequencies for irreducible CFGs, which can
// lead to for example assigning B1 and B3 a higher frequency than B2.
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print_cr("# Irreducible loops: schedule in earliest block B%d:",
early->_pre_order);
self->dump();
}
#endif
schedule_node_into_block(self, early);
continue;
}
}
// Gather LCA of all uses

@ -0,0 +1,97 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.codegen;
import jdk.test.lib.Asserts;
/**
* @test
* @bug 8255763
* @summary Tests GCM's store placement for reducible and irreducible CFGs.
* @library /test/lib /
* @run main/othervm -Xbatch compiler.codegen.TestGCMStorePlacement reducible
* @run main/othervm -Xbatch compiler.codegen.TestGCMStorePlacement irreducible
*/
public class TestGCMStorePlacement {
static int counter;
// Reducible case: counter++ should not be placed into the loop.
static void testReducible() {
counter++;
int acc = 0;
for (int i = 0; i < 50; i++) {
if (i % 2 == 0) {
acc += 1;
}
}
return;
}
// Irreducible case (due to OSR compilation): counter++ should not be placed
// outside its switch case block.
static void testIrreducible() {
for (int i = 0; i < 30; i++) {
switch (i % 3) {
case 0:
for (int j = 0; j < 50; j++) {
// OSR enters here.
for (int k = 0; k < 7000; k++) {}
if (i % 2 == 0) {
break;
}
}
counter++;
break;
case 1:
break;
case 2:
break;
}
}
return;
}
public static void main(String[] args) {
switch (args[0]) {
case "reducible":
// Cause a regular C2 compilation of testReducible.
for (int i = 0; i < 100_000; i++) {
counter = 0;
testReducible();
Asserts.assertEQ(counter, 1);
}
break;
case "irreducible":
// Cause an OSR C2 compilation of testIrreducible.
counter = 0;
testIrreducible();
Asserts.assertEQ(counter, 10);
break;
default:
System.out.println("invalid mode");
}
}
}