8325064: C2 SuperWord: refactor construct_bb

Reviewed-by: kvn, chagedorn
This commit is contained in:
Emanuel Peter 2024-02-05 16:28:51 +00:00
parent d395ac2879
commit 89e6a02e3b
2 changed files with 98 additions and 112 deletions
src/hotspot/share/opto

@ -50,7 +50,6 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
_packset(arena(), 8, 0, nullptr), // packs for the current block _packset(arena(), 8, 0, nullptr), // packs for the current block
_bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
_block(arena(), 8, 0, nullptr), // nodes in current block _block(arena(), 8, 0, nullptr), // nodes in current block
_data_entry(arena(), 8, 0, nullptr), // nodes with all inputs from outside
_mem_slice_head(arena(), 8, 0, nullptr), // memory slice heads _mem_slice_head(arena(), 8, 0, nullptr), // memory slice heads
_mem_slice_tail(arena(), 8, 0, nullptr), // memory slice tails _mem_slice_tail(arena(), 8, 0, nullptr), // memory slice tails
_node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
@ -521,10 +520,33 @@ bool SuperWord::SLP_extract() {
CountedLoopNode* cl = lpt()->_head->as_CountedLoop(); CountedLoopNode* cl = lpt()->_head->as_CountedLoop();
assert(cl->is_main_loop(), "SLP should only work on main loops"); assert(cl->is_main_loop(), "SLP should only work on main loops");
// Find memory slices
find_memory_slices();
if (!is_marked_reduction_loop() &&
_mem_slice_head.is_empty()) {
#ifndef PRODUCT
if (is_trace_superword_any()) {
tty->print_cr("\nNo reductions or memory slices found, abort SuperWord.");
tty->cr();
}
#endif
return false;
}
// Ready the block // Ready the block
if (!construct_bb()) { if (!construct_bb()) {
return false; // Exit if no interesting nodes or complex graph. #ifndef PRODUCT
if (is_trace_superword_any()) {
tty->print_cr("\nSuperWord::construct_bb failed: abort SuperWord");
tty->cr();
} }
#endif
return false;
}
// Ensure extra info is allocated.
initialize_node_info();
// build _dg // build _dg
dependence_graph(); dependence_graph();
@ -896,6 +918,36 @@ void SuperWord::dependence_graph() {
} }
} }
void SuperWord::find_memory_slices() {
assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");
assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty");
// Iterate over all memory phis
for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) {
PhiNode* phi = lp()->fast_out(i)->isa_Phi();
if (phi != nullptr && in_bb(phi) && phi->is_memory_phi()) {
Node* phi_tail = phi->in(LoopNode::LoopBackControl);
if (phi_tail != phi->in(LoopNode::EntryControl)) {
_mem_slice_head.push(phi);
_mem_slice_tail.push(phi_tail->as_Mem());
}
}
}
NOT_PRODUCT( if (is_trace_superword_memory_slices()) { print_memory_slices(); } )
}
#ifndef PRODUCT
void SuperWord::print_memory_slices() {
tty->print_cr("\nSuperWord::print_memory_slices: %s",
_mem_slice_head.length() > 0 ? "" : "NONE");
for (int m = 0; m < _mem_slice_head.length(); m++) {
tty->print("%6d ", m); _mem_slice_head.at(m)->dump();
tty->print(" "); _mem_slice_tail.at(m)->dump();
}
}
#endif
//---------------------------mem_slice_preds--------------------------- //---------------------------mem_slice_preds---------------------------
// Return a memory slice (node list) in predecessor order starting at "start" // Return a memory slice (node list) in predecessor order starting at "start"
void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) { void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) {
@ -2950,177 +3002,110 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
//------------------------------construct_bb--------------------------- //------------------------------construct_bb---------------------------
// Construct reverse postorder list of block members // Construct reverse postorder list of block members
bool SuperWord::construct_bb() { bool SuperWord::construct_bb() {
Node* entry = bb();
assert(_block.length() == 0, "block is empty"); assert(_block.length() == 0, "block is empty");
assert(_data_entry.length() == 0, "data_entry is empty");
assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");
assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty");
// Find non-control nodes with no inputs from within block, // First pass over loop body:
// create a temporary map from node _idx to bb_idx for use // (1) Check that there are no unwanted nodes (LoadStore, MergeMem, data Proj).
// by the visited and post_visited sets, // (2) Count number of nodes, and create a temporary map (_idx -> bb_idx).
// and count number of nodes in block. // (3) Verify that all non-ctrl nodes have an input inside the loop.
int bb_ct = 0; int block_count = 0;
for (uint i = 0; i < lpt()->_body.size(); i++) { for (uint i = 0; i < lpt()->_body.size(); i++) {
Node *n = lpt()->_body.at(i); Node* n = lpt()->_body.at(i);
set_bb_idx(n, i); // Create a temporary map set_bb_idx(n, i); // Create a temporary map
if (in_bb(n)) { if (in_bb(n)) {
block_count++;
if (n->is_LoadStore() || n->is_MergeMem() || if (n->is_LoadStore() || n->is_MergeMem() ||
(n->is_Proj() && !n->as_Proj()->is_CFG())) { (n->is_Proj() && !n->as_Proj()->is_CFG())) {
// Bailout if the loop has LoadStore, MergeMem or data Proj // Bailout if the loop has LoadStore, MergeMem or data Proj
// nodes. Superword optimization does not work with them. // nodes. Superword optimization does not work with them.
#ifndef PRODUCT
if (is_trace_superword_any()) {
tty->print_cr("SuperWord::construct_bb: fails because of unhandled node:");
n->dump();
}
#endif
return false; return false;
} }
bb_ct++;
#ifdef ASSERT
if (!n->is_CFG()) { if (!n->is_CFG()) {
bool found = false; bool found = false;
for (uint j = 0; j < n->req(); j++) { for (uint j = 0; j < n->req(); j++) {
Node* def = n->in(j); Node* def = n->in(j);
if (def && in_bb(def)) { if (def != nullptr && in_bb(def)) {
found = true; found = true;
break; break;
} }
} }
if (!found) { assert(found, "every non-cfg node must have an input that is also inside the loop");
assert(n != entry, "can't be entry");
_data_entry.push(n);
}
} }
#endif
} }
} }
// Find memory slices (head and tail) // Create a reverse-post-order list of nodes in block
for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) {
Node *n = lp()->fast_out(i);
if (in_bb(n) && n->is_memory_phi()) {
Node* n_tail = n->in(LoopNode::LoopBackControl);
if (n_tail != n->in(LoopNode::EntryControl)) {
if (!n_tail->is_Mem()) {
assert(n_tail->is_Mem(), "unexpected node for memory slice: %s", n_tail->Name());
return false; // Bailout
}
_mem_slice_head.push(n);
_mem_slice_tail.push(n_tail);
}
}
}
// Create an RPO list of nodes in block
ResourceMark rm; ResourceMark rm;
GrowableArray<Node*> stack; GrowableArray<Node*> stack;
VectorSet visited; VectorSet visited;
VectorSet post_visited; VectorSet post_visited;
// Push all non-control nodes with no inputs from within block, then control entry visited.set(bb_idx(bb()));
for (int j = 0; j < _data_entry.length(); j++) { stack.push(bb());
Node* n = _data_entry.at(j);
visited.set(bb_idx(n));
stack.push(n);
}
visited.set(bb_idx(entry));
stack.push(entry);
// Do a depth first walk over out edges // Do a depth first walk over out edges
int rpo_idx = bb_ct - 1; int rpo_idx = block_count - 1;
int size; while (!stack.is_empty()) {
int reduction_uses = 0;
while ((size = stack.length()) > 0) {
Node* n = stack.top(); // Leave node on stack Node* n = stack.top(); // Leave node on stack
if (!visited.test_set(bb_idx(n))) { if (!visited.test_set(bb_idx(n))) {
// forward arc in graph // forward arc in graph
} else if (!post_visited.test(bb_idx(n))) { } else if (!post_visited.test(bb_idx(n))) {
// cross or back arc // cross or back arc
const int old_length = stack.length();
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node *use = n->fast_out(i); Node* use = n->fast_out(i);
if (in_bb(use) && !visited.test(bb_idx(use)) && if (in_bb(use) && !visited.test(bb_idx(use)) &&
// Don't go around backedge // Don't go around backedge
(!use->is_Phi() || n == entry)) { (!use->is_Phi() || n == bb())) {
if (is_marked_reduction(use)) {
// First see if we can map the reduction on the given system we are on, then
// make a data entry operation for each reduction we see.
BasicType bt = use->bottom_type()->basic_type();
if (ReductionNode::implemented(use->Opcode(), Matcher::max_vector_size_auto_vectorization(bt), bt)) {
reduction_uses++;
}
}
stack.push(use); stack.push(use);
} }
} }
if (stack.length() == size) { if (stack.length() == old_length) {
// There were no additional uses, post visit node now // There were no additional uses, post visit node now
stack.pop(); // Remove node from stack stack.pop(); // Remove node from stack
assert(rpo_idx >= 0, ""); assert(rpo_idx >= 0, "must still have idx to pass out");
_block.at_put_grow(rpo_idx, n); _block.at_put_grow(rpo_idx, n);
rpo_idx--; rpo_idx--;
post_visited.set(bb_idx(n)); post_visited.set(bb_idx(n));
assert(rpo_idx >= 0 || stack.is_empty(), ""); assert(rpo_idx >= 0 || stack.is_empty(), "still have idx left or are finished");
} }
} else { } else {
stack.pop(); // Remove post-visited node from stack stack.pop(); // Remove post-visited node from stack
} }
}//while }
int ii_current = -1;
unsigned int load_idx = (unsigned int)-1;
// Create real map of block indices for nodes // Create real map of block indices for nodes
for (int j = 0; j < _block.length(); j++) { for (int j = 0; j < _block.length(); j++) {
Node* n = _block.at(j); Node* n = _block.at(j);
set_bb_idx(n, j); set_bb_idx(n, j);
}//for }
// Ensure extra info is allocated.
initialize_bb();
#ifndef PRODUCT #ifndef PRODUCT
if (is_trace_superword_info()) { if (is_trace_superword_info()) {
print_bb(); print_bb();
tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
for (int m = 0; m < _data_entry.length(); m++) {
tty->print("%3d ", m);
_data_entry.at(m)->dump();
}
}
if (is_trace_superword_memory_slices()) {
tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE");
for (int m = 0; m < _mem_slice_head.length(); m++) {
tty->print("%3d ", m); _mem_slice_head.at(m)->dump();
tty->print(" "); _mem_slice_tail.at(m)->dump();
}
} }
#endif #endif
assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found");
return (_mem_slice_head.length() > 0) || (reduction_uses > 0) || (_data_entry.length() > 0); assert(rpo_idx == -1 && block_count == _block.length(), "all block members found");
return true;
} }
//------------------------------initialize_bb---------------------------
// Initialize per node info // Initialize per node info
void SuperWord::initialize_bb() { void SuperWord::initialize_node_info() {
Node* last = _block.at(_block.length() - 1); Node* last = _block.at(_block.length() - 1);
grow_node_info(bb_idx(last)); grow_node_info(bb_idx(last));
} }
//------------------------------bb_insert_after---------------------------
// Insert n into block after pos
void SuperWord::bb_insert_after(Node* n, int pos) {
int n_pos = pos + 1;
// Make room
for (int i = _block.length() - 1; i >= n_pos; i--) {
_block.at_put_grow(i+1, _block.at(i));
}
for (int j = _node_info.length() - 1; j >= n_pos; j--) {
_node_info.at_put_grow(j+1, _node_info.at(j));
}
// Set value
_block.at_put_grow(n_pos, n);
_node_info.at_put_grow(n_pos, SWNodeInfo::initial);
// Adjust map from node->_idx to _block index
for (int i = n_pos; i < _block.length(); i++) {
set_bb_idx(_block.at(i), i);
}
}
//------------------------------compute_max_depth--------------------------- //------------------------------compute_max_depth---------------------------
// Compute max depth for expressions from beginning of block // Compute max depth for expressions from beginning of block
// Use to prune search paths during test for independence. // Use to prune search paths during test for independence.
@ -3776,7 +3761,6 @@ void SuperWord::init() {
_dg.init(); _dg.init();
_packset.clear(); _packset.clear();
_block.clear(); _block.clear();
_data_entry.clear();
_mem_slice_head.clear(); _mem_slice_head.clear();
_mem_slice_tail.clear(); _mem_slice_tail.clear();
_node_info.clear(); _node_info.clear();

@ -214,9 +214,8 @@ class SuperWord : public ResourceObj {
GrowableArray<int> _bb_idx; // Map from Node _idx to index within block GrowableArray<int> _bb_idx; // Map from Node _idx to index within block
GrowableArray<Node*> _block; // Nodes in current block GrowableArray<Node*> _block; // Nodes in current block
GrowableArray<Node*> _data_entry; // Nodes with all inputs from outside GrowableArray<PhiNode*> _mem_slice_head; // Memory slice head nodes
GrowableArray<Node*> _mem_slice_head; // Memory slice head nodes GrowableArray<MemNode*> _mem_slice_tail; // Memory slice tail nodes
GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes
GrowableArray<SWNodeInfo> _node_info; // Info needed per node GrowableArray<SWNodeInfo> _node_info; // Info needed per node
CloneMap& _clone_map; // map of nodes created in cloning CloneMap& _clone_map; // map of nodes created in cloning
MemNode const* _align_to_ref; // Memory reference that pre-loop will align to MemNode const* _align_to_ref; // Memory reference that pre-loop will align to
@ -467,8 +466,13 @@ private:
int get_iv_adjustment(MemNode* mem); int get_iv_adjustment(MemNode* mem);
// Construct dependency graph. // Construct dependency graph.
void dependence_graph(); void dependence_graph();
// Analyze the memory slices
void find_memory_slices();
NOT_PRODUCT( void print_memory_slices(); )
// Return a memory slice (node list) in predecessor order starting at "start" // Return a memory slice (node list) in predecessor order starting at "start"
void mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds); void mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds);
// Can s1 and s2 be in a pack with s1 immediately preceding s2 and s1 aligned at "align" // Can s1 and s2 be in a pack with s1 immediately preceding s2 and s1 aligned at "align"
bool stmts_can_pack(Node* s1, Node* s2, int align); bool stmts_can_pack(Node* s1, Node* s2, int align);
// Does s exist in a pack at position pos? // Does s exist in a pack at position pos?
@ -537,9 +541,7 @@ private:
// Construct reverse postorder list of block members // Construct reverse postorder list of block members
bool construct_bb(); bool construct_bb();
// Initialize per node info // Initialize per node info
void initialize_bb(); void initialize_node_info();
// Insert n into block after pos
void bb_insert_after(Node* n, int pos);
// Compute max depth for expressions from beginning of block // Compute max depth for expressions from beginning of block
void compute_max_depth(); void compute_max_depth();
// Return the longer type for vectorizable type-conversion node or illegal type for other nodes. // Return the longer type for vectorizable type-conversion node or illegal type for other nodes.