8333258: C2: high memory usage in PhaseCFG::insert_anti_dependences()
Reviewed-by: kvn, epeter
This commit is contained in:
parent
d23c59e408
commit
5381f553ad
@ -562,6 +562,103 @@ bool PhaseCFG::unrelated_load_in_store_null_block(Node* store, Node* load) {
|
||||
return false;
|
||||
}
|
||||
|
||||
class DefUseMemStatesQueue : public StackObj {
|
||||
private:
|
||||
class DefUsePair : public StackObj {
|
||||
private:
|
||||
Node* _def; // memory state
|
||||
Node* _use; // use of the memory state that also modifies the memory state
|
||||
|
||||
public:
|
||||
DefUsePair(Node* def, Node* use) :
|
||||
_def(def), _use(use) {
|
||||
}
|
||||
|
||||
DefUsePair() :
|
||||
_def(nullptr), _use(nullptr) {
|
||||
}
|
||||
|
||||
Node* def() const {
|
||||
return _def;
|
||||
}
|
||||
|
||||
Node* use() const {
|
||||
return _use;
|
||||
}
|
||||
};
|
||||
|
||||
GrowableArray<DefUsePair> _queue;
|
||||
GrowableArray<MergeMemNode*> _worklist_visited; // visited mergemem nodes
|
||||
|
||||
bool already_enqueued(Node* def_mem, PhiNode* use_phi) const {
|
||||
// def_mem is one of the inputs of use_phi and at least one input of use_phi is
|
||||
// not def_mem. It's however possible that use_phi has def_mem as input multiple
|
||||
// times. If that happens, use_phi is recorded as a use of def_mem multiple
|
||||
// times as well. When PhaseCFG::insert_anti_dependences() goes over
|
||||
// uses of def_mem and enqueues them for processing, use_phi would then be
|
||||
// enqueued for processing multiple times when it only needs to be
|
||||
// processed once. The code below checks if use_phi as a use of def_mem was
|
||||
// already enqueued to avoid redundant processing of use_phi.
|
||||
int j = _queue.length()-1;
|
||||
// If there are any use of def_mem already enqueued, they were enqueued
|
||||
// last (all use of def_mem are processed in one go).
|
||||
for (; j >= 0; j--) {
|
||||
const DefUsePair& def_use_pair = _queue.at(j);
|
||||
if (def_use_pair.def() != def_mem) {
|
||||
// We're done with the uses of def_mem
|
||||
break;
|
||||
}
|
||||
if (def_use_pair.use() == use_phi) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#ifdef ASSERT
|
||||
for (; j >= 0; j--) {
|
||||
const DefUsePair& def_use_pair = _queue.at(j);
|
||||
assert(def_use_pair.def() != def_mem, "Should be done with the uses of def_mem");
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
public:
|
||||
DefUseMemStatesQueue(ResourceArea* area) {
|
||||
}
|
||||
|
||||
void push(Node* def_mem_state, Node* use_mem_state) {
|
||||
if (use_mem_state->is_MergeMem()) {
|
||||
// Be sure we don't get into combinatorial problems.
|
||||
if (!_worklist_visited.append_if_missing(use_mem_state->as_MergeMem())) {
|
||||
return; // already on work list; do not repeat
|
||||
}
|
||||
} else if (use_mem_state->is_Phi()) {
|
||||
// A Phi could have the same mem as input multiple times. If that's the case, we don't need to enqueue it
|
||||
// more than once. We otherwise allow phis to be repeated; they can merge two relevant states.
|
||||
if (already_enqueued(def_mem_state, use_mem_state->as_Phi())) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
_queue.push(DefUsePair(def_mem_state, use_mem_state));
|
||||
}
|
||||
|
||||
bool is_nonempty() const {
|
||||
return _queue.is_nonempty();
|
||||
}
|
||||
|
||||
Node* top_def() const {
|
||||
return _queue.top().def();
|
||||
}
|
||||
|
||||
Node* top_use() const {
|
||||
return _queue.top().use();
|
||||
}
|
||||
|
||||
void pop() {
|
||||
_queue.pop();
|
||||
}
|
||||
};
|
||||
|
||||
//--------------------------insert_anti_dependences---------------------------
|
||||
// A load may need to witness memory that nearby stores can overwrite.
|
||||
// For each nearby store, either insert an "anti-dependence" edge
|
||||
@ -579,6 +676,7 @@ bool PhaseCFG::unrelated_load_in_store_null_block(Node* store, Node* load) {
|
||||
// preserve anti-dependences. The caller may also hoist the load
|
||||
// above the LCA, if it is not the early block.
|
||||
Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) {
|
||||
ResourceMark rm;
|
||||
assert(load->needs_anti_dependence_check(), "must be a load of some sort");
|
||||
assert(LCA != nullptr, "");
|
||||
DEBUG_ONLY(Block* LCA_orig = LCA);
|
||||
@ -625,10 +723,8 @@ Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) {
|
||||
early = memory_early_block(load, early, this);
|
||||
}
|
||||
|
||||
ResourceArea *area = Thread::current()->resource_area();
|
||||
Node_List worklist_mem(area); // prior memory state to store
|
||||
Node_List worklist_store(area); // possible-def to explore
|
||||
Node_List worklist_visited(area); // visited mergemem nodes
|
||||
ResourceArea* area = Thread::current()->resource_area();
|
||||
DefUseMemStatesQueue worklist_def_use_mem_states(area); // prior memory state to store and possible-def to explore
|
||||
Node_List non_early_stores(area); // all relevant stores outside of early
|
||||
bool must_raise_LCA = false;
|
||||
|
||||
@ -644,60 +740,56 @@ Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) {
|
||||
// The relevant stores "nearby" the load consist of a tree rooted
|
||||
// at initial_mem, with internal nodes of type MergeMem.
|
||||
// Therefore, the branches visited by the worklist are of this form:
|
||||
// initial_mem -> (MergeMem ->)* store
|
||||
// initial_mem -> (MergeMem ->)* Memory state modifying node
|
||||
// Memory state modifying nodes include Store and Phi nodes and any node for which needs_anti_dependence_check()
|
||||
// returns false.
|
||||
// The anti-dependence constraints apply only to the fringe of this tree.
|
||||
|
||||
Node* initial_mem = load->in(MemNode::Memory);
|
||||
worklist_store.push(initial_mem);
|
||||
worklist_visited.push(initial_mem);
|
||||
worklist_mem.push(nullptr);
|
||||
while (worklist_store.size() > 0) {
|
||||
worklist_def_use_mem_states.push(nullptr, initial_mem);
|
||||
while (worklist_def_use_mem_states.is_nonempty()) {
|
||||
// Examine a nearby store to see if it might interfere with our load.
|
||||
Node* mem = worklist_mem.pop();
|
||||
Node* store = worklist_store.pop();
|
||||
uint op = store->Opcode();
|
||||
Node* def_mem_state = worklist_def_use_mem_states.top_def();
|
||||
Node* use_mem_state = worklist_def_use_mem_states.top_use();
|
||||
worklist_def_use_mem_states.pop();
|
||||
|
||||
uint op = use_mem_state->Opcode();
|
||||
assert(!use_mem_state->needs_anti_dependence_check(), "no loads");
|
||||
|
||||
// MergeMems do not directly have anti-deps.
|
||||
// Treat them as internal nodes in a forward tree of memory states,
|
||||
// the leaves of which are each a 'possible-def'.
|
||||
if (store == initial_mem // root (exclusive) of tree we are searching
|
||||
if (use_mem_state == initial_mem // root (exclusive) of tree we are searching
|
||||
|| op == Op_MergeMem // internal node of tree we are searching
|
||||
) {
|
||||
mem = store; // It's not a possibly interfering store.
|
||||
if (store == initial_mem)
|
||||
def_mem_state = use_mem_state; // It's not a possibly interfering store.
|
||||
if (use_mem_state == initial_mem)
|
||||
initial_mem = nullptr; // only process initial memory once
|
||||
|
||||
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
|
||||
store = mem->fast_out(i);
|
||||
if (store->is_MergeMem()) {
|
||||
// Be sure we don't get into combinatorial problems.
|
||||
// (Allow phis to be repeated; they can merge two relevant states.)
|
||||
uint j = worklist_visited.size();
|
||||
for (; j > 0; j--) {
|
||||
if (worklist_visited.at(j-1) == store) break;
|
||||
}
|
||||
if (j > 0) continue; // already on work list; do not repeat
|
||||
worklist_visited.push(store);
|
||||
for (DUIterator_Fast imax, i = def_mem_state->fast_outs(imax); i < imax; i++) {
|
||||
use_mem_state = def_mem_state->fast_out(i);
|
||||
if (use_mem_state->needs_anti_dependence_check()) {
|
||||
// use_mem_state is also a kind of load (i.e. needs_anti_dependence_check), and it is not a memory state
|
||||
// modifying node (store, Phi or MergeMem). Hence, load can't be anti dependent on this node.
|
||||
continue;
|
||||
}
|
||||
worklist_mem.push(mem);
|
||||
worklist_store.push(store);
|
||||
worklist_def_use_mem_states.push(def_mem_state, use_mem_state);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (op == Op_MachProj || op == Op_Catch) continue;
|
||||
if (store->needs_anti_dependence_check()) continue; // not really a store
|
||||
|
||||
// Compute the alias index. Loads and stores with different alias
|
||||
// indices do not need anti-dependence edges. Wide MemBar's are
|
||||
// anti-dependent on everything (except immutable memories).
|
||||
const TypePtr* adr_type = store->adr_type();
|
||||
const TypePtr* adr_type = use_mem_state->adr_type();
|
||||
if (!C->can_alias(adr_type, load_alias_idx)) continue;
|
||||
|
||||
// Most slow-path runtime calls do NOT modify Java memory, but
|
||||
// they can block and so write Raw memory.
|
||||
if (store->is_Mach()) {
|
||||
MachNode* mstore = store->as_Mach();
|
||||
if (use_mem_state->is_Mach()) {
|
||||
MachNode* mstore = use_mem_state->as_Mach();
|
||||
if (load_alias_idx != Compile::AliasIdxRaw) {
|
||||
// Check for call into the runtime using the Java calling
|
||||
// convention (and from there into a wrapper); it has no
|
||||
@ -736,10 +828,10 @@ Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) {
|
||||
// or else observe that 'store' is all the way up in the
|
||||
// earliest legal block for 'load'. In the latter case,
|
||||
// immediately insert an anti-dependence edge.
|
||||
Block* store_block = get_block_for_node(store);
|
||||
Block* store_block = get_block_for_node(use_mem_state);
|
||||
assert(store_block != nullptr, "unused killing projections skipped above");
|
||||
|
||||
if (store->is_Phi()) {
|
||||
if (use_mem_state->is_Phi()) {
|
||||
// Loop-phis need to raise load before input. (Other phis are treated
|
||||
// as store below.)
|
||||
//
|
||||
@ -754,8 +846,8 @@ Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) {
|
||||
// Do not assert(store_block != early, "Phi merging memory after access")
|
||||
// PhiNode may be at start of block 'early' with backedge to 'early'
|
||||
DEBUG_ONLY(bool found_match = false);
|
||||
for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) {
|
||||
if (store->in(j) == mem) { // Found matching input?
|
||||
for (uint j = PhiNode::Input, jmax = use_mem_state->req(); j < jmax; j++) {
|
||||
if (use_mem_state->in(j) == def_mem_state) { // Found matching input?
|
||||
DEBUG_ONLY(found_match = true);
|
||||
Block* pred_block = get_block_for_node(store_block->pred(j));
|
||||
if (pred_block != early) {
|
||||
@ -781,21 +873,21 @@ Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) {
|
||||
// will find him on the non_early_stores list and stick him
|
||||
// with a precedence edge.
|
||||
// (But, don't bother if LCA is already raised all the way.)
|
||||
if (LCA != early && !unrelated_load_in_store_null_block(store, load)) {
|
||||
if (LCA != early && !unrelated_load_in_store_null_block(use_mem_state, load)) {
|
||||
store_block->set_raise_LCA_mark(load_index);
|
||||
must_raise_LCA = true;
|
||||
non_early_stores.push(store);
|
||||
non_early_stores.push(use_mem_state);
|
||||
}
|
||||
} else {
|
||||
// Found a possibly-interfering store in the load's 'early' block.
|
||||
// This means 'load' cannot sink at all in the dominator tree.
|
||||
// Add an anti-dep edge, and squeeze 'load' into the highest block.
|
||||
assert(store != load->find_exact_control(load->in(0)), "dependence cycle found");
|
||||
assert(use_mem_state != load->find_exact_control(load->in(0)), "dependence cycle found");
|
||||
if (verify) {
|
||||
assert(store->find_edge(load) != -1 || unrelated_load_in_store_null_block(store, load),
|
||||
assert(use_mem_state->find_edge(load) != -1 || unrelated_load_in_store_null_block(use_mem_state, load),
|
||||
"missing precedence edge");
|
||||
} else {
|
||||
store->add_prec(load);
|
||||
use_mem_state->add_prec(load);
|
||||
}
|
||||
LCA = early;
|
||||
// This turns off the process of gathering non_early_stores.
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user