From 5cddc2de493d9d8712e4bee3aed4f1a0d4c228c3 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Tue, 2 Apr 2024 06:10:08 +0000 Subject: [PATCH] 8325252: C2 SuperWord: refactor the packset Reviewed-by: chagedorn, kvn --- src/hotspot/share/opto/superword.cpp | 873 ++++++++---------- src/hotspot/share/opto/superword.hpp | 515 +++++++---- src/hotspot/share/opto/vectorization.hpp | 2 +- src/hotspot/share/opto/vectornode.cpp | 2 +- src/hotspot/share/opto/vectornode.hpp | 2 +- .../loopopts/superword/TestMulAddS2I.java | 119 ++- 6 files changed, 862 insertions(+), 651 deletions(-) diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index d5b05426a89..8bef9d09980 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -42,11 +42,14 @@ SuperWord::SuperWord(const VLoopAnalyzer &vloop_analyzer) : _vloop_analyzer(vloop_analyzer), _vloop(vloop_analyzer.vloop()), _arena(mtCompiler), - _packset(arena(), 8, 0, nullptr), // packs for the current block _node_info(arena(), _vloop.estimated_body_length(), 0, SWNodeInfo::initial), // info needed per node _clone_map(phase()->C->clone_map()), // map of nodes created in cloning _align_to_ref(nullptr), // memory reference to align vectors to - _race_possible(false), // cases where SDMU is true + _pairset(&_arena, _vloop_analyzer), + _packset(&_arena, _vloop_analyzer + NOT_PRODUCT(COMMA is_trace_superword_packset()) + NOT_PRODUCT(COMMA is_trace_superword_rejections()) + ), _do_vector_loop(phase()->C->do_vector_loop()), // whether to do vectorization/simd style _num_work_vecs(0), // amount of vector work we have _num_reductions(0) // amount of reduction work we have @@ -454,7 +457,7 @@ bool SuperWord::SLP_extract() { // Attempt vectorization find_adjacent_refs(); - if (_packset.length() == 0) { + if (_pairset.is_empty()) { #ifndef PRODUCT if (is_trace_superword_any()) { tty->print_cr("\nNo pair packs generated, abort SuperWord."); @@ -464,18 +467,15 @@ bool SuperWord::SLP_extract() { return false; } - extend_packset_with_more_pairs_by_following_use_and_def(); + extend_pairset_with_more_pairs_by_following_use_and_def(); combine_pairs_to_longer_packs(); - construct_my_pack_map(); - split_packs_at_use_def_boundaries(); // a first time: create natural boundaries split_packs_only_implemented_with_smaller_size(); split_packs_to_break_mutual_dependence(); split_packs_at_use_def_boundaries(); // again: propagate split of other packs - // Now we only remove packs: filter_packs_for_power_of_2_size(); filter_packs_for_mutual_independence(); filter_packs_for_alignment(); @@ -555,11 +555,8 @@ void SuperWord::find_adjacent_refs() { if (alignment(s2) == top_align) continue; if (s1 != s2 && are_adjacent_refs(s1, s2)) { if (stmts_can_pack(s1, s2, align)) { - Node_List* pair = new Node_List(); - pair->push(s1); - pair->push(s2); if (!_do_vector_loop || same_origin_idx(s1, s2)) { - _packset.append(pair); + _pairset.add_pair(s1, s2); } } } @@ -575,13 +572,13 @@ void SuperWord::find_adjacent_refs() { } } // while (memops.size() != 0) - assert(_packset.is_empty() || align_to_mem_ref != nullptr, - "packset empty or we find the alignment reference"); + assert(_pairset.is_empty() || align_to_mem_ref != nullptr, + "pairset empty or we find the alignment reference"); #ifndef PRODUCT if (is_trace_superword_packset()) { tty->print_cr("\nAfter Superword::find_adjacent_refs"); - print_packset(); + _pairset.print(); } #endif } @@ -843,7 +840,7 @@ bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { // and will still be vectorized by SuperWord::vector_opd. if (isomorphic(s1, s2) && !is_populate_index(s1, s2)) { if ((independent(s1, s2) && have_similar_inputs(s1, s2)) || reduction(s1, s2)) { - if (!exists_at(s1, 0) && !exists_at(s2, 1)) { + if (!_pairset.is_left(s1) && !_pairset.is_right(s2)) { if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) { int s1_align = alignment(s1); int s2_align = alignment(s2); @@ -859,21 +856,9 @@ bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { return false; } -//------------------------------exists_at--------------------------- -// Does s exist in a pack at position pos? -bool SuperWord::exists_at(Node* s, uint pos) { - for (int i = 0; i < _packset.length(); i++) { - Node_List* p = _packset.at(i); - if (p->at(pos) == s) { - return true; - } - } - return false; -} - //------------------------------are_adjacent_refs--------------------------- // Is s1 immediately before s2 in memory? -bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) { +bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) const { if (!s1->is_Mem() || !s2->is_Mem()) return false; if (!in_bb(s1) || !in_bb(s2)) return false; @@ -1024,7 +1009,7 @@ bool SuperWord::have_similar_inputs(Node* s1, Node* s2) { return true; } -bool VLoopReductions::is_marked_reduction_pair(Node* s1, Node* s2) const { +bool VLoopReductions::is_marked_reduction_pair(const Node* s1, const Node* s2) const { if (is_marked_reduction(s1) && is_marked_reduction(s2)) { // This is an ordered set, so s1 should define s2 @@ -1049,30 +1034,37 @@ void SuperWord::set_alignment(Node* s1, Node* s2, int align) { } } -// Extend packset by following use->def and def->use links from pack members. -void SuperWord::extend_packset_with_more_pairs_by_following_use_and_def() { +// Extend pairset by following use->def and def->use links from pair members. +void SuperWord::extend_pairset_with_more_pairs_by_following_use_and_def() { bool changed; do { - packset_sort(_packset.length()); changed = false; - for (int i = 0; i < _packset.length(); i++) { - Node_List* p = _packset.at(i); - changed |= follow_use_defs(p); - changed |= follow_def_uses(p); + // Iterate the pairs in insertion order. + for (int i = 0; i < _pairset.length(); i++) { + Node* left = _pairset.left_at_in_insertion_order(i); + Node* right = _pairset.right_at_in_insertion_order(i); + changed |= extend_pairset_with_more_pairs_by_following_def(left, right); + changed |= extend_pairset_with_more_pairs_by_following_use(left, right); } } while (changed); - if (_race_possible) { - for (int i = 0; i < _packset.length(); i++) { - Node_List* p = _packset.at(i); - order_def_uses(p); - } + // During extend_pairset_with_more_pairs_by_following_use, we may have re-ordered the + // inputs of some nodes, when calling order_inputs_of_uses_to_match_def_pair. If a def + // node has multiple uses, we may have re-ordered some of the inputs one use after + // packing another use with the old order. Now that we have all pairs, we must ensure + // that the order between the pairs is matching again. Since the PairSetIterator visits + // all pair-chains from left-to-right, we essencially impose the order of the first + // element on all other elements in the pair-chain. + for (PairSetIterator pair(_pairset); !pair.done(); pair.next()) { + Node* left = pair.left(); + Node* right = pair.right(); + order_inputs_of_all_use_pairs_to_match_def_pair(left, right); } #ifndef PRODUCT if (is_trace_superword_packset()) { - tty->print_cr("\nAfter Superword::extend_packset_with_more_pairs_by_following_use_and_def"); - print_packset(); + tty->print_cr("\nAfter Superword::extend_pairset_with_more_pairs_by_following_use_and_def"); + _pairset.print(); } #endif } @@ -1093,12 +1085,8 @@ int SuperWord::adjust_alignment_for_type_conversion(Node* s, Node* t, int align) return align; } -//------------------------------follow_use_defs--------------------------- -// Extend the packset by visiting operand definitions of nodes in pack p -bool SuperWord::follow_use_defs(Node_List* p) { - assert(p->size() == 2, "just checking"); - Node* s1 = p->at(0); - Node* s2 = p->at(1); +bool SuperWord::extend_pairset_with_more_pairs_by_following_def(Node* s1, Node* s2) { + assert(_pairset.is_pair(s1, s2), "(s1, s2) must be a pair"); assert(s1->req() == s2->req(), "just checking"); assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); @@ -1106,7 +1094,7 @@ bool SuperWord::follow_use_defs(Node_List* p) { #ifndef PRODUCT if (is_trace_superword_alignment()) { - tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d", + tty->print_cr("SuperWord::extend_pairset_with_more_pairs_by_following_def: s1 %d, align %d", s1->_idx, alignment(s1)); } #endif @@ -1123,14 +1111,11 @@ bool SuperWord::follow_use_defs(Node_List* p) { } align = adjust_alignment_for_type_conversion(s1, t1, align); if (stmts_can_pack(t1, t2, align)) { - if (est_savings(t1, t2) >= 0) { - Node_List* pair = new Node_List(); - pair->push(t1); - pair->push(t2); - _packset.append(pair); + if (estimate_cost_savings_when_packing_as_pair(t1, t2) >= 0) { + _pairset.add_pair(t1, t2); #ifndef PRODUCT if (is_trace_superword_alignment()) { - tty->print_cr("SuperWord::follow_use_defs: set_alignment(%d, %d, %d)", + tty->print_cr("SuperWord::extend_pairset_with_more_pairs_by_following_def: set_alignment(%d, %d, %d)", t1->_idx, t2->_idx, align); } #endif @@ -1142,13 +1127,11 @@ bool SuperWord::follow_use_defs(Node_List* p) { return changed; } -//------------------------------follow_def_uses--------------------------- -// Extend the packset by visiting uses of nodes in pack p -bool SuperWord::follow_def_uses(Node_List* p) { - bool changed = false; - Node* s1 = p->at(0); - Node* s2 = p->at(1); - assert(p->size() == 2, "just checking"); +// Note: we only extend with a single pair (the one with most savings) for every call. Since we keep +// calling this method as long as there are some changes, we will eventually pack all pairs that +// can be packed. +bool SuperWord::extend_pairset_with_more_pairs_by_following_use(Node* s1, Node* s2) { + assert(_pairset.is_pair(s1, s2), "(s1, s2) must be a pair"); assert(s1->req() == s2->req(), "just checking"); assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); @@ -1157,17 +1140,15 @@ bool SuperWord::follow_def_uses(Node_List* p) { int align = alignment(s1); #ifndef PRODUCT if (is_trace_superword_alignment()) { - tty->print_cr("SuperWord::follow_def_uses: s1 %d, align %d", + tty->print_cr("SuperWord::extend_pairset_with_more_pairs_by_following_use: s1 %d, align %d", s1->_idx, align); } #endif int savings = -1; - int num_s1_uses = 0; Node* u1 = nullptr; Node* u2 = nullptr; for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { Node* t1 = s1->fast_out(i); - num_s1_uses++; if (!in_bb(t1) || t1->is_Mem()) { // Only follow non-memory nodes in block - we do not want to resurrect misaligned packs. continue; @@ -1179,12 +1160,11 @@ bool SuperWord::follow_def_uses(Node_List* p) { continue; } if (t2->Opcode() == Op_AddI && t2 == cl()->incr()) continue; // don't mess with the iv - if (!opnd_positions_match(s1, t1, s2, t2)) - continue; + if (order_inputs_of_uses_to_match_def_pair(s1, s2, t1, t2) != PairOrderStatus::Ordered) { continue; } int adjusted_align = alignment(s1); adjusted_align = adjust_alignment_for_type_conversion(s1, t1, adjusted_align); if (stmts_can_pack(t1, t2, adjusted_align)) { - int my_savings = est_savings(t1, t2); + int my_savings = estimate_cost_savings_when_packing_as_pair(t1, t2); if (my_savings > savings) { savings = my_savings; u1 = t1; @@ -1194,130 +1174,150 @@ bool SuperWord::follow_def_uses(Node_List* p) { } } } - if (num_s1_uses > 1) { - _race_possible = true; - } if (savings >= 0) { - Node_List* pair = new Node_List(); - pair->push(u1); - pair->push(u2); - _packset.append(pair); + _pairset.add_pair(u1, u2); #ifndef PRODUCT if (is_trace_superword_alignment()) { - tty->print_cr("SuperWord::follow_def_uses: set_alignment(%d, %d, %d)", + tty->print_cr("SuperWord::extend_pairset_with_more_pairs_by_following_use: set_alignment(%d, %d, %d)", u1->_idx, u2->_idx, align); } #endif set_alignment(u1, u2, align); - changed = true; + return true; // changed } - return changed; + return false; // no change } -//------------------------------order_def_uses--------------------------- -// For extended packsets, ordinally arrange uses packset by major component -void SuperWord::order_def_uses(Node_List* p) { - Node* s1 = p->at(0); +// For a pair (def1, def2), find all use packs (use1, use2), and ensure that their inputs have an order +// that matches the (def1, def2) pair. +void SuperWord::order_inputs_of_all_use_pairs_to_match_def_pair(Node* def1, Node* def2) { + assert(_pairset.is_pair(def1, def2), "(def1, def2) must be a pair"); - if (s1->is_Store()) return; + if (def1->is_Store()) return; // reductions are always managed beforehand - if (is_marked_reduction(s1)) return; + if (is_marked_reduction(def1)) return; - for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { - Node* t1 = s1->fast_out(i); + for (DUIterator_Fast imax, i = def1->fast_outs(imax); i < imax; i++) { + Node* use1 = def1->fast_out(i); // Only allow operand swap on commuting operations - if (!t1->is_Add() && !t1->is_Mul() && !VectorNode::is_muladds2i(t1)) { + if (!use1->is_Add() && !use1->is_Mul() && !VectorNode::is_muladds2i(use1)) { break; } - // Now find t1's packset - Node_List* p2 = nullptr; - for (int j = 0; j < _packset.length(); j++) { - p2 = _packset.at(j); - Node* first = p2->at(0); - if (t1 == first) { - break; - } - p2 = nullptr; - } - // Arrange all sub components by the major component - if (p2 != nullptr) { - for (uint j = 1; j < p->size(); j++) { - Node* d1 = p->at(j); - Node* u1 = p2->at(j); - opnd_positions_match(s1, t1, d1, u1); - } - } + // Find pair (use1, use2) + Node* use2 = _pairset.get_right_or_null_for(use1); + if (use2 == nullptr) { break; } + + order_inputs_of_uses_to_match_def_pair(def1, def2, use1, use2); } } -//---------------------------opnd_positions_match------------------------- -// Is the use of d1 in u1 at the same operand position as d2 in u2? -bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) { - // check reductions to see if they are marshalled to represent the reduction - // operator in a specified opnd - if (is_marked_reduction(u1) && is_marked_reduction(u2)) { - // ensure reductions have phis and reduction definitions feeding the 1st operand - Node* first = u1->in(2); - if (first->is_Phi() || is_marked_reduction(first)) { - u1->swap_edges(1, 2); +// For a def-pair (def1, def2), and their use-nodes (use1, use2): +// Ensure that the input order of (use1, use2) matches the order of (def1, def2). +// +// We have different cases: +// +// 1. Reduction (use1, use2): must always reduce left-to-right. Make sure that we have pattern: +// +// phi/reduction x1 phi/reduction x2 phi/reduction x1 +// | | | | and hopefully: | | +// use1 use2 use1 x2 +// | | +// use2 +// +// 2: Commutative operations, just as Add/Mul and their subclasses: we can try to swap edges: +// +// def1 x1 x2 def2 def1 x1 def2 x2 +// | | | | ==> | | | | +// use1 use2 use1 use2 +// +// 3: MulAddS2I (use1, use2): we can try to swap edges: +// +// (x1 * x2) + (x3 * x4) ==> 3.a: (x2 * x1) + (x4 * x3) +// 3.b: (x4 * x3) + (x2 * x1) +// 3.c: (x3 * x4) + (x1 * x2) +// +// Note: MulAddS2I with its 4 inputs is too complicated, if there is any mismatch, we always +// return PairOrderStatus::Unknown. +// Therefore, extend_pairset_with_more_pairs_by_following_use cannot extend to MulAddS2I, +// but there is a chance that extend_pairset_with_more_pairs_by_following_def can do it. +// +// 4: Otherwise, check if the inputs of (use1, use2) already match (def1, def2), i.e. for all input indices i: +// +// use1->in(i) == def1 || use2->in(i) == def2 -> use1->in(i) == def1 && use2->in(i) == def2 +// +SuperWord::PairOrderStatus SuperWord::order_inputs_of_uses_to_match_def_pair(Node* def1, Node* def2, Node* use1, Node* use2) { + assert(_pairset.is_pair(def1, def2), "(def1, def2) must be a pair"); + + // 1. Reduction + if (is_marked_reduction(use1) && is_marked_reduction(use2)) { + Node* use1_in2 = use1->in(2); + if (use1_in2->is_Phi() || is_marked_reduction(use1_in2)) { + use1->swap_edges(1, 2); } - // ensure reductions have phis and reduction definitions feeding the 1st operand - first = u2->in(2); - if (first->is_Phi() || is_marked_reduction(first)) { - u2->swap_edges(1, 2); + Node* use2_in2 = use2->in(2); + if (use2_in2->is_Phi() || is_marked_reduction(use2_in2)) { + use2->swap_edges(1, 2); } - return true; + return PairOrderStatus::Ordered; } - uint ct = u1->req(); - if (ct != u2->req()) return false; + uint ct = use1->req(); + if (ct != use2->req()) { return PairOrderStatus::Unordered; }; uint i1 = 0; uint i2 = 0; do { - for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break; - for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break; + for (i1++; i1 < ct; i1++) { if (use1->in(i1) == def1) { break; } } + for (i2++; i2 < ct; i2++) { if (use2->in(i2) == def2) { break; } } if (i1 != i2) { - if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) { - // Further analysis relies on operands position matching. - u2->swap_edges(i1, i2); - } else if (VectorNode::is_muladds2i(u2) && u1 != u2) { + if ((i1 == (3-i2)) && (use2->is_Add() || use2->is_Mul())) { + // 2. Commutative: swap edges, and hope the other position matches too. + use2->swap_edges(i1, i2); + } else if (VectorNode::is_muladds2i(use2) && use1 != use2) { + // 3.a/b: MulAddS2I. if (i1 == 5 - i2) { // ((i1 == 3 && i2 == 2) || (i1 == 2 && i2 == 3) || (i1 == 1 && i2 == 4) || (i1 == 4 && i2 == 1)) - u2->swap_edges(1, 2); - u2->swap_edges(3, 4); + use2->swap_edges(1, 2); + use2->swap_edges(3, 4); } if (i1 == 3 - i2 || i1 == 7 - i2) { // ((i1 == 1 && i2 == 2) || (i1 == 2 && i2 == 1) || (i1 == 3 && i2 == 4) || (i1 == 4 && i2 == 3)) - u2->swap_edges(2, 3); - u2->swap_edges(1, 4); + use2->swap_edges(2, 3); + use2->swap_edges(1, 4); } - return false; // Just swap the edges, the muladds2i nodes get packed in follow_use_defs + return PairOrderStatus::Unknown; } else { - return false; + // 4. The inputs are not ordered, and we cannot do anything about it. + return PairOrderStatus::Unordered; } - } else if (i1 == i2 && VectorNode::is_muladds2i(u2) && u1 != u2) { - u2->swap_edges(1, 3); - u2->swap_edges(2, 4); - return false; // Just swap the edges, the muladds2i nodes get packed in follow_use_defs + } else if (i1 == i2 && VectorNode::is_muladds2i(use2) && use1 != use2) { + // 3.c: MulAddS2I. + use2->swap_edges(1, 3); + use2->swap_edges(2, 4); + return PairOrderStatus::Unknown; } } while (i1 < ct); - return true; + + // 4. All inputs match. + return PairOrderStatus::Ordered; } -//------------------------------est_savings--------------------------- -// Estimate the savings from executing s1 and s2 as a pack -int SuperWord::est_savings(Node* s1, Node* s2) { +// Estimate the savings from executing s1 and s2 as a pair. +int SuperWord::estimate_cost_savings_when_packing_as_pair(const Node* s1, const Node* s2) const { int save_in = 2 - 1; // 2 operations per instruction in packed form + const int adjacent_profit = 2; + auto pack_cost = [&] (const int size) { return size; }; + auto unpack_cost = [&] (const int size) { return size; }; + // inputs for (uint i = 1; i < s1->req(); i++) { Node* x1 = s1->in(i); Node* x2 = s2->in(i); if (x1 != x2) { if (are_adjacent_refs(x1, x2)) { - save_in += adjacent_profit(x1, x2); - } else if (!in_packset(x1, x2)) { + save_in += adjacent_profit; + } else if (!_pairset.is_pair(x1, x2)) { save_in -= pack_cost(2); } else { save_in += unpack_cost(2); @@ -1326,85 +1326,76 @@ int SuperWord::est_savings(Node* s1, Node* s2) { } // uses of result - uint ct = 0; + uint number_of_packed_use_pairs = 0; int save_use = 0; for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { - Node* s1_use = s1->fast_out(i); - for (int j = 0; j < _packset.length(); j++) { - Node_List* p = _packset.at(j); - if (p->at(0) == s1_use) { - for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) { - Node* s2_use = s2->fast_out(k); - if (p->at(p->size()-1) == s2_use) { - ct++; - if (are_adjacent_refs(s1_use, s2_use)) { - save_use += adjacent_profit(s1_use, s2_use); - } - } + Node* use1 = s1->fast_out(i); + + // Find pair (use1, use2) + Node* use2 = _pairset.get_right_or_null_for(use1); + if (use2 == nullptr) { continue; } + + for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) { + if (use2 == s2->fast_out(k)) { + // We have pattern: + // + // s1 s2 + // | | + // [use1, use2] + // + number_of_packed_use_pairs++; + if (are_adjacent_refs(use1, use2)) { + save_use += adjacent_profit; } } } } - if (ct < s1->outcnt()) save_use += unpack_cost(1); - if (ct < s2->outcnt()) save_use += unpack_cost(1); + if (number_of_packed_use_pairs < s1->outcnt()) save_use += unpack_cost(1); + if (number_of_packed_use_pairs < s2->outcnt()) save_use += unpack_cost(1); return MAX2(save_in, save_use); } -//------------------------------costs--------------------------- -int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; } -int SuperWord::pack_cost(int ct) { return ct; } -int SuperWord::unpack_cost(int ct) { return ct; } - -// Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last +// Combine pairs (n1, n2), (n2, n3), ... into pack (n1, n2, n3 ...) void SuperWord::combine_pairs_to_longer_packs() { #ifdef ASSERT - assert(!_packset.is_empty(), "packset not empty"); - for (int i = 0; i < _packset.length(); i++) { - assert(_packset.at(i) != nullptr, "no nullptr in packset"); - assert(_packset.at(i)->size() == 2, "all packs are pairs"); - } + assert(!_pairset.is_empty(), "pairset not empty"); + assert(_packset.is_empty(), "packset not empty"); #endif - bool changed = true; - // Combine packs regardless max vector size. - while (changed) { - changed = false; - for (int i = 0; i < _packset.length(); i++) { - Node_List* p1 = _packset.at(i); - if (p1 == nullptr) continue; - // Because of sorting we can start at i + 1 - for (int j = i + 1; j < _packset.length(); j++) { - Node_List* p2 = _packset.at(j); - if (p2 == nullptr) continue; - if (p1->at(p1->size()-1) == p2->at(0)) { - for (uint k = 1; k < p2->size(); k++) { - p1->push(p2->at(k)); - } - _packset.at_put(j, nullptr); - changed = true; - } - } + // Iterate pair-chain by pair-chain, each from left-most to right-most. + Node_List* pack = nullptr; + for (PairSetIterator pair(_pairset); !pair.done(); pair.next()) { + Node* left = pair.left(); + Node* right = pair.right(); + if (_pairset.is_left_in_a_left_most_pair(left)) { + assert(pack == nullptr, "no unfinished pack"); + pack = new (arena()) Node_List(arena()); + pack->push(left); + } + assert(pack != nullptr, "must have unfinished pack"); + pack->push(right); + if (_pairset.is_right_in_a_right_most_pair(right)) { + _packset.add_pack(pack); + pack = nullptr; } } - - // Remove all nullptr from packset - compress_packset(); + assert(pack == nullptr, "no unfinished pack"); assert(!_packset.is_empty(), "must have combined some packs"); #ifndef PRODUCT if (is_trace_superword_packset()) { tty->print_cr("\nAfter Superword::combine_pairs_to_longer_packs"); - print_packset(); + _packset.print(); } #endif } -SuperWord::SplitStatus SuperWord::split_pack(const char* split_name, - Node_List* pack, - SplitTask task) +SplitStatus PackSet::split_pack(const char* split_name, + Node_List* pack, + SplitTask task) { uint pack_size = pack->size(); @@ -1416,14 +1407,11 @@ SuperWord::SplitStatus SuperWord::split_pack(const char* split_name, #ifndef PRODUCT if (is_trace_superword_rejections()) { tty->cr(); - tty->print_cr("WARNING: Removed pack during split: %s:", task.message()); + tty->print_cr("WARNING: Removed pack: %s:", task.message()); print_pack(pack); } #endif - for (uint i = 0; i < pack_size; i++) { - Node* n = pack->at(i); - set_my_pack(n, nullptr); - } + unmap_all_nodes_in_pack(pack); return SplitStatus::make_rejected(); } @@ -1453,10 +1441,7 @@ SuperWord::SplitStatus SuperWord::split_pack(const char* split_name, print_pack(pack); } #endif - for (uint i = 0; i < pack_size; i++) { - Node* n = pack->at(i); - set_my_pack(n, nullptr); - } + unmap_all_nodes_in_pack(pack); return SplitStatus::make_rejected(); } @@ -1464,7 +1449,7 @@ SuperWord::SplitStatus SuperWord::split_pack(const char* split_name, if (new_size < 2) { assert(new_size == 1 && old_size >= 2, "implied"); Node* n = pack->pop(); - set_my_pack(n, nullptr); + unmap_node_in_pack(n); #ifndef PRODUCT if (is_trace_superword_rejections()) { tty->cr(); @@ -1480,7 +1465,7 @@ SuperWord::SplitStatus SuperWord::split_pack(const char* split_name, assert(old_size == 1 && new_size >= 2, "implied"); Node* n = pack->at(0); pack->remove(0); - set_my_pack(n, nullptr); + unmap_node_in_pack(n); #ifndef PRODUCT if (is_trace_superword_rejections()) { tty->cr(); @@ -1498,7 +1483,7 @@ SuperWord::SplitStatus SuperWord::split_pack(const char* split_name, for (uint i = 0; i < new_size; i++) { Node* n = pack->at(old_size + i); new_pack->push(n); - set_my_pack(n, new_pack); + remap_node_in_pack(n, new_pack); } for (uint i = 0; i < new_size; i++) { @@ -1511,130 +1496,110 @@ SuperWord::SplitStatus SuperWord::split_pack(const char* split_name, } template -void SuperWord::split_packs(const char* split_name, - SplitStrategy strategy) { +void PackSet::split_packs(const char* split_name, + SplitStrategy strategy) { bool changed; do { changed = false; int new_packset_length = 0; - for (int i = 0; i < _packset.length(); i++) { - Node_List* pack = _packset.at(i); + for (int i = 0; i < _packs.length(); i++) { + Node_List* pack = _packs.at(i); assert(pack != nullptr && pack->size() >= 2, "no nullptr, at least size 2"); SplitTask task = strategy(pack); SplitStatus status = split_pack(split_name, pack, task); changed |= !status.is_unchanged(); Node_List* first_pack = status.first_pack(); Node_List* second_pack = status.second_pack(); - _packset.at_put(i, nullptr); // take out pack + _packs.at_put(i, nullptr); // take out pack if (first_pack != nullptr) { // The first pack can be put at the current position assert(i >= new_packset_length, "only move packs down"); - _packset.at_put(new_packset_length++, first_pack); + _packs.at_put(new_packset_length++, first_pack); } if (second_pack != nullptr) { // The second node has to be appended at the end - _packset.append(second_pack); + _packs.append(second_pack); } } - _packset.trunc_to(new_packset_length); + _packs.trunc_to(new_packset_length); } while (changed); #ifndef PRODUCT if (is_trace_superword_packset()) { tty->print_cr("\nAfter %s", split_name); - print_packset(); + print(); } #endif } // Split packs at boundaries where left and right have different use or def packs. void SuperWord::split_packs_at_use_def_boundaries() { - split_packs("SuperWord::split_packs_at_use_def_boundaries", - [&](const Node_List* pack) { - uint pack_size = pack->size(); - uint boundary = find_use_def_boundary(pack); - assert(boundary < pack_size, "valid boundary %d", boundary); - if (boundary != 0) { - return SplitTask::make_split(pack_size - boundary, "found a use/def boundary"); - } - return SplitTask::make_unchanged(); - }); + auto split_strategy = [&](const Node_List* pack) { + uint pack_size = pack->size(); + uint boundary = find_use_def_boundary(pack); + assert(boundary < pack_size, "valid boundary %d", boundary); + if (boundary != 0) { + return SplitTask::make_split(pack_size - boundary, "found a use/def boundary"); + } + return SplitTask::make_unchanged(); + }; + _packset.split_packs("SuperWord::split_packs_at_use_def_boundaries", split_strategy); } // Split packs that are only implemented with a smaller pack size. Also splits packs // such that they eventually have power of 2 size. void SuperWord::split_packs_only_implemented_with_smaller_size() { - split_packs("SuperWord::split_packs_only_implemented_with_smaller_size", - [&](const Node_List* pack) { - uint pack_size = pack->size(); - uint implemented_size = max_implemented_size(pack); - if (implemented_size == 0) { - return SplitTask::make_rejected("not implemented at any smaller size"); - } - assert(is_power_of_2(implemented_size), "power of 2 size or zero: %d", implemented_size); - if (implemented_size != pack_size) { - return SplitTask::make_split(implemented_size, "only implemented at smaller size"); - } - return SplitTask::make_unchanged(); - }); + auto split_strategy = [&](const Node_List* pack) { + uint pack_size = pack->size(); + uint implemented_size = max_implemented_size(pack); + if (implemented_size == 0) { + return SplitTask::make_rejected("not implemented at any smaller size"); + } + assert(is_power_of_2(implemented_size), "power of 2 size or zero: %d", implemented_size); + if (implemented_size != pack_size) { + return SplitTask::make_split(implemented_size, "only implemented at smaller size"); + } + return SplitTask::make_unchanged(); + }; + _packset.split_packs("SuperWord::split_packs_only_implemented_with_smaller_size", split_strategy); } // Split packs that have a mutual dependency, until all packs are mutually_independent. void SuperWord::split_packs_to_break_mutual_dependence() { - split_packs("SuperWord::split_packs_to_break_mutual_dependence", - [&](const Node_List* pack) { - uint pack_size = pack->size(); - assert(is_power_of_2(pack_size), "ensured by earlier splits %d", pack_size); - if (!is_marked_reduction(pack->at(0)) && - !mutually_independent(pack)) { - // As a best guess, we split the pack in half. This way, we iteratively make the - // packs smaller, until there is no dependency. - return SplitTask::make_split(pack_size >> 1, "was not mutually independent"); - } - return SplitTask::make_unchanged(); - }); + auto split_strategy = [&](const Node_List* pack) { + uint pack_size = pack->size(); + assert(is_power_of_2(pack_size), "ensured by earlier splits %d", pack_size); + if (!is_marked_reduction(pack->at(0)) && + !mutually_independent(pack)) { + // As a best guess, we split the pack in half. This way, we iteratively make the + // packs smaller, until there is no dependency. + return SplitTask::make_split(pack_size >> 1, "was not mutually independent"); + } + return SplitTask::make_unchanged(); + }; + _packset.split_packs("SuperWord::split_packs_to_break_mutual_dependence", split_strategy); } template -void SuperWord::filter_packs(const char* filter_name, - const char* error_message, +void PackSet::filter_packs(const char* filter_name, + const char* rejection_message, FilterPredicate filter) { - int new_packset_length = 0; - for (int i = 0; i < _packset.length(); i++) { - Node_List* pack = _packset.at(i); - assert(pack != nullptr, "no nullptr in packset"); + auto split_strategy = [&](const Node_List* pack) { if (filter(pack)) { - assert(i >= new_packset_length, "only move packs down"); - _packset.at_put(new_packset_length++, pack); + return SplitTask::make_unchanged(); } else { - remove_pack_at(i); -#ifndef PRODUCT - if (is_trace_superword_rejections()) { - tty->cr(); - tty->print_cr("WARNING: Removed pack: %s:", error_message); - print_pack(pack); - } -#endif + return SplitTask::make_rejected(rejection_message); } - } - - assert(_packset.length() >= new_packset_length, "filter only reduces number of packs"); - _packset.trunc_to(new_packset_length); - -#ifndef PRODUCT - if (is_trace_superword_packset() && filter_name != nullptr) { - tty->print_cr("\nAfter %s:", filter_name); - print_packset(); - } -#endif + }; + split_packs(filter_name, split_strategy); } void SuperWord::filter_packs_for_power_of_2_size() { - filter_packs("SuperWord::filter_packs_for_power_of_2_size", - "size is not a power of 2", - [&](const Node_List* pack) { - return is_power_of_2(pack->size()); - }); + auto filter = [&](const Node_List* pack) { + return is_power_of_2(pack->size()); + }; + _packset.filter_packs("SuperWord::filter_packs_for_power_of_2_size", + "size is not a power of 2", filter); } // We know that the nodes in a pair pack were independent - this gives us independence @@ -1658,13 +1623,13 @@ void SuperWord::filter_packs_for_power_of_2_size() { // for (int i ...) { v[i] = v[i + 1] + 5; } // for (int i ...) { v[i - 1] = v[i] + 5; } void SuperWord::filter_packs_for_mutual_independence() { - filter_packs("SuperWord::filter_packs_for_mutual_independence", - "found dependency between nodes at distance greater than 1", - [&](const Node_List* pack) { - // reductions are trivially connected - return is_marked_reduction(pack->at(0)) || - mutually_independent(pack); - }); + auto filter = [&](const Node_List* pack) { + // reductions are trivially connected + return is_marked_reduction(pack->at(0)) || + mutually_independent(pack); + }; + _packset.filter_packs("SuperWord::filter_packs_for_mutual_independence", + "found dependency between nodes at distance greater than 1", filter); } // Find the set of alignment solutions for load/store pack. @@ -1713,35 +1678,36 @@ void SuperWord::filter_packs_for_alignment() { int mem_ops_count = 0; int mem_ops_rejected = 0; - filter_packs("SuperWord::filter_packs_for_alignment", - "rejected by AlignVector (strict alignment requirement)", - [&](const Node_List* pack) { - // Only memops need to be aligned. - if (!pack->at(0)->is_Load() && - !pack->at(0)->is_Store()) { - return true; // accept all non memops - } + auto filter = [&](const Node_List* pack) { + // Only memops need to be aligned. + if (!pack->at(0)->is_Load() && + !pack->at(0)->is_Store()) { + return true; // accept all non memops + } - mem_ops_count++; - const AlignmentSolution* s = pack_alignment_solution(pack); - const AlignmentSolution* intersect = current->filter(s); + mem_ops_count++; + const AlignmentSolution* s = pack_alignment_solution(pack); + const AlignmentSolution* intersect = current->filter(s); #ifndef PRODUCT - if (is_trace_align_vector()) { - tty->print(" solution for pack: "); - s->print(); - tty->print(" intersection with current: "); - intersect->print(); - } + if (is_trace_align_vector()) { + tty->print(" solution for pack: "); + s->print(); + tty->print(" intersection with current: "); + intersect->print(); + } #endif - if (intersect->is_empty()) { - mem_ops_rejected++; - return false; // reject because of empty solution - } + if (intersect->is_empty()) { + mem_ops_rejected++; + return false; // reject because of empty solution + } - current = intersect; - return true; // accept because of non-empty solution - }); + current = intersect; + return true; // accept because of non-empty solution + }; + + _packset.filter_packs("SuperWord::filter_packs_for_alignment", + "rejected by AlignVector (strict alignment requirement)", filter); #ifndef PRODUCT if (is_trace_superword_info() || is_trace_align_vector()) { @@ -1760,47 +1726,13 @@ void SuperWord::filter_packs_for_alignment() { } } -// Compress packset, such that it has no nullptr entries -void SuperWord::compress_packset() { - int j = 0; - for (int i = 0; i < _packset.length(); i++) { - Node_List* p = _packset.at(i); - if (p != nullptr) { - _packset.at_put(j, p); - j++; - } - } - _packset.trunc_to(j); -} - -//-----------------------------construct_my_pack_map-------------------------- -// Construct the map from nodes to packs. Only valid after the -// point where a node is only in one pack (after combine_pairs_to_longer_packs). -void SuperWord::construct_my_pack_map() { - for (int i = 0; i < _packset.length(); i++) { - Node_List* p = _packset.at(i); - for (uint j = 0; j < p->size(); j++) { - Node* s = p->at(j); -#ifdef ASSERT - if (my_pack(s) != nullptr) { - s->dump(1); - tty->print_cr("packs[%d]:", i); - print_pack(p); - assert(false, "only in one pack"); - } -#endif - set_my_pack(s, p); - } - } -} - // Remove packs that are not implemented void SuperWord::filter_packs_for_implemented() { - filter_packs("SuperWord::filter_packs_for_implemented", - "Unimplemented", - [&](const Node_List* pack) { - return implemented(pack, pack->size()); - }); + auto filter = [&](const Node_List* pack) { + return implemented(pack, pack->size()); + }; + _packset.filter_packs("SuperWord::filter_packs_for_implemented", + "Unimplemented", filter); } // Remove packs that are not profitable. @@ -1818,30 +1750,15 @@ void SuperWord::filter_packs_for_profitable() { } // Remove packs that are not profitable - while (true) { - int old_packset_length = _packset.length(); - filter_packs(nullptr, // don't dump each time - "not profitable", - [&](const Node_List* pack) { - return profitable(pack); - }); - // Repeat until stable - if (old_packset_length == _packset.length()) { - break; - } - } - -#ifndef PRODUCT - if (is_trace_superword_packset()) { - tty->print_cr("\nAfter Superword::filter_packs_for_profitable"); - print_packset(); - tty->cr(); - } -#endif + auto filter = [&](const Node_List* pack) { + return profitable(pack); + }; + _packset.filter_packs("Superword::filter_packs_for_profitable", + "not profitable", filter); } // Can code be generated for the pack, restricted to size nodes? -bool SuperWord::implemented(const Node_List* pack, uint size) { +bool SuperWord::implemented(const Node_List* pack, const uint size) const { assert(size >= 2 && size <= pack->size() && is_power_of_2(size), "valid size"); bool retValue = false; Node* p0 = pack->at(0); @@ -1917,7 +1834,7 @@ bool SuperWord::requires_long_to_int_conversion(int opc) { //------------------------------same_inputs-------------------------- // For pack p, are all idx operands the same? -bool SuperWord::same_inputs(const Node_List* p, int idx) { +bool SuperWord::same_inputs(const Node_List* p, int idx) const { Node* p0 = p->at(0); uint vlen = p->size(); Node* p0_def = p0->in(idx); @@ -1933,7 +1850,7 @@ bool SuperWord::same_inputs(const Node_List* p, int idx) { //------------------------------profitable--------------------------- // For pack p, are all operands and all uses (with in the block) vector? -bool SuperWord::profitable(const Node_List* p) { +bool SuperWord::profitable(const Node_List* p) const { Node* p0 = p->at(0); uint start, end; VectorNode::vector_operands(p0, &start, &end); @@ -1951,7 +1868,7 @@ bool SuperWord::profitable(const Node_List* p) { // Check if reductions are connected if (is_marked_reduction(p0)) { Node* second_in = p0->in(2); - Node_List* second_pk = my_pack(second_in); + Node_List* second_pk = get_pack(second_in); if ((second_pk == nullptr) || (_num_work_vecs == _num_reductions)) { // No parent pack or not enough work // to cover reduction expansion overhead @@ -1964,7 +1881,7 @@ bool SuperWord::profitable(const Node_List* p) { // For now, return false if shift count is vector or not scalar promotion // case (different shift counts) because it is not supported yet. Node* cnt = p0->in(2); - Node_List* cnt_pk = my_pack(cnt); + Node_List* cnt_pk = get_pack(cnt); if (cnt_pk != nullptr) return false; if (!same_inputs(p, 2)) @@ -2017,12 +1934,12 @@ bool SuperWord::profitable(const Node_List* p) { if (p0->is_CMove()) { // Verify that CMove has a matching Bool pack BoolNode* bol = p0->in(1)->as_Bool(); - if (bol == nullptr || my_pack(bol) == nullptr) { + if (bol == nullptr || get_pack(bol) == nullptr) { return false; } // Verify that Bool has a matching Cmp pack CmpNode* cmp = bol->in(1)->as_Cmp(); - if (cmp == nullptr || my_pack(cmp) == nullptr) { + if (cmp == nullptr || get_pack(cmp) == nullptr) { return false; } } @@ -2030,37 +1947,57 @@ bool SuperWord::profitable(const Node_List* p) { } #ifdef ASSERT -void SuperWord::verify_packs() { - // Verify independence at pack level. +void SuperWord::verify_packs() const { + _packset.verify(); + + // All packs must be: for (int i = 0; i < _packset.length(); i++) { - Node_List* p = _packset.at(i); - if (!is_marked_reduction(p->at(0)) && - !mutually_independent(p)) { + Node_List* pack = _packset.at(i); + + // 1. Mutually independent (or a reduction). + if (!is_marked_reduction(pack->at(0)) && + !mutually_independent(pack)) { tty->print_cr("FAILURE: nodes not mutually independent in pack[%d]", i); - print_pack(p); + _packset.print_pack(pack); assert(false, "pack nodes not mutually independent"); } - } - // Verify all nodes in packset have my_pack set correctly. + // 2. Implemented. + if (!implemented(pack, pack->size())) { + tty->print_cr("FAILURE: nodes not implementable in pack[%d]", i); + _packset.print_pack(pack); + assert(false, "pack not implementable"); + } + + // 3. Profitable. + if (!profitable(pack)) { + tty->print_cr("FAILURE: nodes not profitable in pack[%d]", i); + _packset.print_pack(pack); + assert(false, "pack not profitable"); + } + } +} + +void PackSet::verify() const { + // Verify all nodes in packset have pack set correctly. ResourceMark rm; Unique_Node_List processed; - for (int i = 0; i < _packset.length(); i++) { - Node_List* p = _packset.at(i); + for (int i = 0; i < _packs.length(); i++) { + Node_List* p = _packs.at(i); for (uint k = 0; k < p->size(); k++) { Node* n = p->at(k); - assert(in_bb(n), "only nodes in bb can be in packset"); + assert(_vloop.in_bb(n), "only nodes in bb can be in packset"); assert(!processed.member(n), "node should only occur once in packset"); - assert(my_pack(n) == p, "n has consisten packset info"); + assert(get_pack(n) == p, "n has consisten packset info"); processed.push(n); } } - // Check that no other node has my_pack set. - for (int i = 0; i < body().length(); i++) { - Node* n = body().at(i); + // Check that no other node has pack set. + for (int i = 0; i < _body.body().length(); i++) { + Node* n = _body.body().at(i); if (!processed.member(n)) { - assert(my_pack(n) == nullptr, "should not have pack if not in packset"); + assert(get_pack(n) == nullptr, "should not have pack if not in packset"); } } } @@ -2087,7 +2024,7 @@ class PacksetGraph { private: // pid: packset graph node id. GrowableArray _pid; // bb_idx(n) -> pid - GrowableArray _pid_to_node; // one node per pid, find rest via my_pack + GrowableArray _pid_to_node; // one node per pid, find rest via _packset.pack GrowableArray> _out; // out-edges GrowableArray _incnt; // number of (implicit) in-edges int _max_pid = 0; @@ -2141,7 +2078,7 @@ public: // Create nodes (from packs and scalar-nodes), and add edges, based on the dependency graph. void build() { - const GrowableArray& packset = _slp->packset(); + const PackSet& packset = _slp->packset(); const GrowableArray& body = _slp->body(); // Map nodes in packsets for (int i = 0; i < packset.length(); i++) { @@ -2150,7 +2087,7 @@ public: for (uint k = 0; k < p->size(); k++) { Node* n = p->at(k); set_pid(n, pid); - assert(_slp->my_pack(n) == p, "matching packset"); + assert(packset.get_pack(n) == p, "matching packset"); } } @@ -2166,7 +2103,7 @@ public: if (pid == 0) { pid = new_pid(); set_pid(n, pid); - assert(_slp->my_pack(n) == nullptr, "no packset"); + assert(packset.get_pack(n) == nullptr, "no packset"); } } @@ -2232,7 +2169,7 @@ public: // Add memops to memops_schedule Node* n = get_node(pid); - Node_List* p = _slp->my_pack(n); + Node_List* p = _slp->packset().get_pack(n); if (n->is_Mem()) { if (p == nullptr) { memops_schedule.push(n); @@ -2440,7 +2377,7 @@ bool SuperWord::output() { CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); assert(cl->is_main_loop(), "SLP should only work on main loops"); Compile* C = phase()->C; - if (_packset.length() == 0) { + if (_packset.is_empty()) { return false; } @@ -2461,7 +2398,7 @@ bool SuperWord::output() { for (int i = 0; i < body().length(); i++) { Node* n = body().at(i); - Node_List* p = my_pack(n); + Node_List* p = get_pack(n); if (p != nullptr && n == p->at(p->size()-1)) { // After schedule_reorder_memops, we know that the memops have the same order in the pack // as in the memory slice. Hence, "first" is the first memop in the slice from the pack, @@ -2551,7 +2488,7 @@ bool SuperWord::output() { bol_test == BoolTest::lt || bol_test == BoolTest::le, "CMove bool should be one of: eq,ne,ge,ge,lt,le"); - Node_List* p_bol = my_pack(bol); + Node_List* p_bol = get_pack(bol); assert(p_bol != nullptr, "CMove must have matching Bool pack"); #ifdef ASSERT @@ -2564,7 +2501,7 @@ bool SuperWord::output() { CmpNode* cmp = bol->in(1)->as_Cmp(); assert(cmp != nullptr, "must have cmp above CMove"); - Node_List* p_cmp = my_pack(cmp); + Node_List* p_cmp = get_pack(cmp); assert(p_cmp != nullptr, "Bool must have matching Cmp pack"); Node* cmp_in1 = vector_opd(p_cmp, 1); @@ -2883,7 +2820,7 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) { for (uint i = 1; i < vlen; i++) { Node* pi = p->at(i); Node* in = pi->in(opd_idx); - if (my_pack(in) != nullptr) { + if (get_pack(in) != nullptr) { assert(false, "Should already have been unpacked"); return nullptr; } @@ -2891,7 +2828,7 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) { pk->add_opd(in); if (VectorNode::is_muladds2i(pi)) { Node* in2 = pi->in(opd_idx + 2); - if (my_pack(in2) != nullptr) { + if (get_pack(in2) != nullptr) { assert(false, "Should already have been unpacked"); return nullptr; } @@ -2924,7 +2861,7 @@ void SuperWord::verify_no_extract() { for (uint k = 0; k < use->req(); k++) { Node* maybe_def = use->in(k); if (def == maybe_def) { - Node_List* p_use = my_pack(use); + Node_List* p_use = get_pack(use); if (is_marked_reduction(def)) { continue; } assert(p_use != nullptr && is_vector_use(use, k), "all uses must be vector uses"); } @@ -2937,13 +2874,13 @@ void SuperWord::verify_no_extract() { // Check if n_super's pack uses are a superset of n_sub's pack uses. bool SuperWord::has_use_pack_superset(const Node* n_super, const Node* n_sub) const { - Node_List* pack = my_pack(n_super); - assert(pack != nullptr && pack == my_pack(n_sub), "must have the same pack"); + Node_List* pack = get_pack(n_super); + assert(pack != nullptr && pack == get_pack(n_sub), "must have the same pack"); // For all uses of n_sub that are in a pack (use_sub) ... for (DUIterator_Fast jmax, j = n_sub->fast_outs(jmax); j < jmax; j++) { Node* use_sub = n_sub->fast_out(j); - Node_List* pack_use_sub = my_pack(use_sub); + Node_List* pack_use_sub = get_pack(use_sub); if (pack_use_sub == nullptr) { continue; } // ... and all input edges: use_sub->in(i) == n_sub. @@ -2956,7 +2893,7 @@ bool SuperWord::has_use_pack_superset(const Node* n_super, const Node* n_sub) co bool found = false; for (DUIterator_Fast kmax, k = n_super->fast_outs(kmax); k < kmax; k++) { Node* use_super = n_super->fast_out(k); - Node_List* pack_use_super = my_pack(use_super); + Node_List* pack_use_super = get_pack(use_super); if (pack_use_sub != pack_use_super) { continue; } // ... and where there is an edge use_super->in(i) == n_super. @@ -3005,7 +2942,7 @@ uint SuperWord::find_use_def_boundary(const Node_List* pack) const { // No boundary if: // 1) the same packs OR // 2) reduction edge n0->n1 or n1->n0 - if (my_pack(n0_in) != my_pack(n1_in) && + if (get_pack(n0_in) != get_pack(n1_in) && !((n0 == n1_in || n1 == n0_in) && is_reduction_pack)) { return i + 1; } @@ -3025,12 +2962,12 @@ uint SuperWord::find_use_def_boundary(const Node_List* pack) const { //------------------------------is_vector_use--------------------------- // Is use->in(u_idx) a vector use? -bool SuperWord::is_vector_use(Node* use, int u_idx) { - Node_List* u_pk = my_pack(use); +bool SuperWord::is_vector_use(Node* use, int u_idx) const { + Node_List* u_pk = get_pack(use); if (u_pk == nullptr) return false; if (is_marked_reduction(use)) return true; Node* def = use->in(u_idx); - Node_List* d_pk = my_pack(def); + Node_List* d_pk = get_pack(def); if (d_pk == nullptr) { Node* n = u_pk->at(0)->in(u_idx); if (n == iv()) { @@ -3213,7 +3150,7 @@ void SuperWord::initialize_node_info() { grow_node_info(bb_idx(last)); } -BasicType SuperWord::longer_type_for_conversion(Node* n) { +BasicType SuperWord::longer_type_for_conversion(Node* n) const { if (!(VectorNode::is_convert_opcode(n->Opcode()) || requires_long_to_int_conversion(n->Opcode())) || !in_bb(n->in(1))) { @@ -3427,49 +3364,6 @@ bool VLoopMemorySlices::same_memory_slice(MemNode* m1, MemNode* m2) const { _vloop.phase()->C->get_alias_index(m2->adr_type()); } -//------------------------------in_packset--------------------------- -// Are s1 and s2 in a pack pair and ordered as s1,s2? -bool SuperWord::in_packset(Node* s1, Node* s2) { - for (int i = 0; i < _packset.length(); i++) { - Node_List* p = _packset.at(i); - assert(p->size() == 2, "must be"); - if (p->at(0) == s1 && p->at(p->size()-1) == s2) { - return true; - } - } - return false; -} - -//------------------------------remove_pack_at--------------------------- -// Remove the pack at position pos in the packset -void SuperWord::remove_pack_at(int pos) { - Node_List* p = _packset.at(pos); - for (uint i = 0; i < p->size(); i++) { - Node* s = p->at(i); - set_my_pack(s, nullptr); - } - _packset.at_put(pos, nullptr); -} - -void SuperWord::packset_sort(int n) { - // simple bubble sort so that we capitalize with O(n) when its already sorted - do { - int max_swap_index = 0; - for (int i = 1; i < n; i++) { - Node_List* q_low = _packset.at(i-1); - Node_List* q_i = _packset.at(i); - - // only swap when we find something to swap - if (alignment(q_low->at(0)) > alignment(q_i->at(0))) { - *(_packset.adr_at(i)) = q_low; - *(_packset.adr_at(i-1)) = q_i; - max_swap_index = i; - } - } - n = max_swap_index; - } while (n > 1); -} - LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) { LoadNode::ControlDependency dep = LoadNode::DependsOnlyOnTest; for (uint i = 0; i < p->size(); i++) { @@ -3822,29 +3716,46 @@ void SuperWord::adjust_pre_loop_limit_to_align_main_loop_vectors() { igvn().replace_input_of(pre_opaq, 1, constrained_limit); } -//------------------------------print_packset--------------------------- -void SuperWord::print_packset() { #ifndef PRODUCT - tty->print_cr("packset"); - for (int i = 0; i < _packset.length(); i++) { - tty->print_cr("Pack: %d", i); - Node_List* p = _packset.at(i); - if (p == nullptr) { - tty->print_cr(" nullptr"); - } else { - print_pack(p); +void PairSet::print() const { + tty->print_cr("\nPairSet::print: %d pairs", length()); + int chain = 0; + int chain_index = 0; + for (PairSetIterator pair(*this); !pair.done(); pair.next()) { + Node* left = pair.left(); + Node* right = pair.right(); + if (is_left_in_a_left_most_pair(left)) { + chain_index = 0; + tty->print_cr(" Pair-chain %d:", chain++); + tty->print(" %3d: ", chain_index++); + left->dump(); } + tty->print(" %3d: ", chain_index++); + right->dump(); } -#endif } -//------------------------------print_pack--------------------------- -void SuperWord::print_pack(Node_List* p) { - for (uint i = 0; i < p->size(); i++) { - print_stmt(p->at(i)); +void PackSet::print() const { + tty->print_cr("\nPackSet::print: %d packs", _packs.length()); + for (int i = 0; i < _packs.length(); i++) { + tty->print_cr(" Pack: %d", i); + Node_List* pack = _packs.at(i); + if (pack == nullptr) { + tty->print_cr(" nullptr"); + } else { + print_pack(pack); + } } } +void PackSet::print_pack(Node_List* pack) { + for (uint i = 0; i < pack->size(); i++) { + tty->print(" %3d: ", i); + pack->at(i)->dump(); + } +} +#endif + #ifndef PRODUCT void VLoopBody::print() const { tty->print_cr("\nBlock"); @@ -3858,14 +3769,6 @@ void VLoopBody::print() const { } #endif -//------------------------------print_stmt--------------------------- -void SuperWord::print_stmt(Node* s) { -#ifndef PRODUCT - tty->print(" align: %d \t", alignment(s)); - s->dump(); -#endif -} - // ========================= SWNodeInfo ===================== const SWNodeInfo SWNodeInfo::initial; diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp index ba5944d48b4..e4f2e30052c 100644 --- a/src/hotspot/share/opto/superword.hpp +++ b/src/hotspot/share/opto/superword.hpp @@ -57,6 +57,333 @@ class VPointer; +// The PairSet is a set of pairs. These are later combined to packs, +// and stored in the PackSet. +class PairSet : public StackObj { +private: + const VLoop& _vloop; + const VLoopBody& _body; + + // Doubly-linked pairs. If not linked: -1 + GrowableArray _left_to_right; // bb_idx -> bb_idx + GrowableArray _right_to_left; // bb_idx -> bb_idx + // Example: + // + // Pairs: (n1, n2) and (n2, n3) + // bb_idx(n1) = 1 + // bb_idx(n2) = 3 + // bb_idx(n3) = 5 + // + // index / bb_idx: 0 1 2 3 4 5 6 + // + // left_to_right: | | 3 | | 5 | | | | + // n1-----> + // n2-----> + // + // right_to_left: | | | | 1 | | 3 | | + // <------n2 + // <------n3 + // + // Nodes with bb_idx 0, 2, 4, and 6 are in no pair, they are thus neither left nor right elements, + // and hence have no entries in the mapping. + // + // Nodes with bb_idx 1 and 3 (n1 and n2) are both a left element in some pair. Therefore, they both + // have an entry in the left_to_right mapping. This mapping indicates which right element they are + // paired with, namely the nodes with bb_idx 3 and 5 (n2 and n3), respectively. + // + // Nodes with bb_idx 3 and 5 (n2 and n4) are both a right element in some pair. Therefore, they both + // have an entry in the right_to_left mapping. This mapping indicates which left element they are + // paired with, namely the nodes with bb_idx 1 and 3 (n1 and n2), respectively. + // + // Node n1 with bb_idx 1 is not a right element in any pair, thus its right_to_left is empty. + // + // Node n2 with bb_idx 3 is both a left element of pair (n2, n3), and a right element of pair (n1, n2). + // Thus it has entries in both left_to_right (mapping n2->n3) and right_to_left (mapping n2->n1). + // + // Node n3 with bb_idx 5 is not a left element in any pair, thus its left_to_right is empty. + + // List of all left elements bb_idx, in the order of pair addition. + GrowableArray _lefts_in_insertion_order; + +public: + // Initialize empty, i.e. all not linked (-1). + PairSet(Arena* arena, const VLoopAnalyzer& vloop_analyzer) : + _vloop(vloop_analyzer.vloop()), + _body(vloop_analyzer.body()), + _left_to_right(arena, _body.body().length(), _body.body().length(), -1), + _right_to_left(arena, _body.body().length(), _body.body().length(), -1), + _lefts_in_insertion_order(arena, 8, 0, 0) {} + + const VLoopBody& body() const { return _body; } + + bool is_empty() const { return _lefts_in_insertion_order.is_empty(); } + + bool is_left(int i) const { return _left_to_right.at(i) != -1; } + bool is_right(int i) const { return _right_to_left.at(i) != -1; } + bool is_left(const Node* n) const { return _vloop.in_bb(n) && is_left( _body.bb_idx(n)); } + bool is_right(const Node* n) const { return _vloop.in_bb(n) && is_right(_body.bb_idx(n)); } + + bool is_pair(const Node* n1, const Node* n2) const { return is_left(n1) && get_right_for(n1) == n2; } + + bool is_left_in_a_left_most_pair(int i) const { return is_left(i) && !is_right(i); } + bool is_right_in_a_right_most_pair(int i) const { return !is_left(i) && is_right(i); } + bool is_left_in_a_left_most_pair(const Node* n) const { return is_left_in_a_left_most_pair( _body.bb_idx(n)); } + bool is_right_in_a_right_most_pair(const Node* n) const { return is_right_in_a_right_most_pair(_body.bb_idx(n)); } + + int get_right_for(int i) const { return _left_to_right.at(i); } + Node* get_right_for(const Node* n) const { return _body.body().at(get_right_for(_body.bb_idx(n))); } + Node* get_right_or_null_for(const Node* n) const { return is_left(n) ? get_right_for(n) : nullptr; } + + // To access elements in insertion order: + int length() const { return _lefts_in_insertion_order.length(); } + Node* left_at_in_insertion_order(int i) const { return _body.body().at(_lefts_in_insertion_order.at(i)); } + Node* right_at_in_insertion_order(int i) const { return _body.body().at(get_right_for(_lefts_in_insertion_order.at(i))); } + + void add_pair(Node* n1, Node* n2) { + assert(n1 != nullptr && n2 != nullptr && n1 != n2, "no nullptr, and different nodes"); + assert(!is_left(n1) && !is_right(n2), "cannot be left twice, or right twice"); + int bb_idx_1 = _body.bb_idx(n1); + int bb_idx_2 = _body.bb_idx(n2); + _left_to_right.at_put(bb_idx_1, bb_idx_2); + _right_to_left.at_put(bb_idx_2, bb_idx_1); + _lefts_in_insertion_order.append(bb_idx_1); + assert(is_left(n1) && is_right(n2), "must be set now"); + } + + NOT_PRODUCT(void print() const;) +}; + +// Iterate over the PairSet, pair-chain by pair-chain. +// A pair-chain starts with a "left-most" pair (n1, n2), where n1 is never a right-element +// in any pair. We walk a chain: (n2, n3), (n3, n4) ... until we hit a "right-most" pair +// where the right-element is never a left-element of any pair. +// These pair-chains will later be combined into packs by combine_pairs_to_longer_packs. +class PairSetIterator : public StackObj { +private: + const PairSet& _pairset; + const VLoopBody& _body; + + int _chain_start_bb_idx; // bb_idx of left-element in the left-most pair. + int _current_bb_idx; // bb_idx of left-element of the current pair. + const int _end_bb_idx; + +public: + PairSetIterator(const PairSet& pairset) : + _pairset(pairset), + _body(pairset.body()), + _chain_start_bb_idx(-1), + _current_bb_idx(-1), + _end_bb_idx(_body.body().length()) + { + next_chain(); + } + + bool done() const { + return _chain_start_bb_idx >= _end_bb_idx; + } + + Node* left() const { + return _body.body().at(_current_bb_idx); + } + + Node* right() const { + int bb_idx_2 = _pairset.get_right_for(_current_bb_idx); + return _body.body().at(bb_idx_2); + } + + // Try to keep walking on the current pair-chain, else find a new pair-chain. + void next() { + assert(_pairset.is_left(_current_bb_idx), "current was valid"); + _current_bb_idx = _pairset.get_right_for(_current_bb_idx); + if (!_pairset.is_left(_current_bb_idx)) { + next_chain(); + } + } + +private: + void next_chain() { + do { + _chain_start_bb_idx++; + } while (!done() && !_pairset.is_left_in_a_left_most_pair(_chain_start_bb_idx)); + _current_bb_idx = _chain_start_bb_idx; + } +}; + +class SplitTask { +private: + enum Kind { + // The lambda method for split_packs can return one of these tasks: + Unchanged, // The pack is left in the packset, unchanged. + Rejected, // The pack is removed from the packset. + Split, // Split away split_size nodes from the end of the pack. + }; + const Kind _kind; + const uint _split_size; + const char* _message; + + SplitTask(const Kind kind, const uint split_size, const char* message) : + _kind(kind), _split_size(split_size), _message(message) + { + assert(message != nullptr, "must have message"); + assert(_kind != Unchanged || split_size == 0, "unchanged task conditions"); + assert(_kind != Rejected || split_size == 0, "reject task conditions"); + assert(_kind != Split || split_size != 0, "split task conditions"); + } + +public: + static SplitTask make_split(const uint split_size, const char* message) { + return SplitTask(Split, split_size, message); + } + + static SplitTask make_unchanged() { + return SplitTask(Unchanged, 0, "unchanged"); + } + + static SplitTask make_rejected(const char* message) { + return SplitTask(Rejected, 0, message); + } + + bool is_unchanged() const { return _kind == Unchanged; } + bool is_rejected() const { return _kind == Rejected; } + bool is_split() const { return _kind == Split; } + const char* message() const { return _message; } + + uint split_size() const { + assert(is_split(), "only split tasks have split_size"); + return _split_size; + } +}; + +class SplitStatus { +private: + enum Kind { + // After split_pack, we have: first_pack second_pack + Unchanged, // The pack is left in the pack, unchanged. old_pack nullptr + Rejected, // The pack is removed from the packset. nullptr nullptr + Modified, // The pack had some nodes removed. old_pack nullptr + Split, // The pack was split into two packs. pack1 pack2 + }; + Kind _kind; + Node_List* _first_pack; + Node_List* _second_pack; + + SplitStatus(Kind kind, Node_List* first_pack, Node_List* second_pack) : + _kind(kind), _first_pack(first_pack), _second_pack(second_pack) + { + assert(_kind != Unchanged || (first_pack != nullptr && second_pack == nullptr), "unchanged status conditions"); + assert(_kind != Rejected || (first_pack == nullptr && second_pack == nullptr), "rejected status conditions"); + assert(_kind != Modified || (first_pack != nullptr && second_pack == nullptr), "modified status conditions"); + assert(_kind != Split || (first_pack != nullptr && second_pack != nullptr), "split status conditions"); + } + +public: + static SplitStatus make_unchanged(Node_List* old_pack) { + return SplitStatus(Unchanged, old_pack, nullptr); + } + + static SplitStatus make_rejected() { + return SplitStatus(Rejected, nullptr, nullptr); + } + + static SplitStatus make_modified(Node_List* first_pack) { + return SplitStatus(Modified, first_pack, nullptr); + } + + static SplitStatus make_split(Node_List* first_pack, Node_List* second_pack) { + return SplitStatus(Split, first_pack, second_pack); + } + + bool is_unchanged() const { return _kind == Unchanged; } + Node_List* first_pack() const { return _first_pack; } + Node_List* second_pack() const { return _second_pack; } +}; + +class PackSet : public StackObj { +private: + const VLoop& _vloop; + const VLoopBody& _body; + + // Set of all packs: + GrowableArray _packs; + + // Mapping from nodes to their pack: bb_idx -> pack + GrowableArray _node_to_pack; + + NOT_PRODUCT(const bool _trace_packset;) + NOT_PRODUCT(const bool _trace_rejections;) + +public: + // Initialize empty, i.e. no packs, and unmapped (nullptr). + PackSet(Arena* arena, const VLoopAnalyzer& vloop_analyzer + NOT_PRODUCT(COMMA bool trace_packset COMMA bool trace_rejections) + ) : + _vloop(vloop_analyzer.vloop()), + _body(vloop_analyzer.body()), + _packs(arena, 8, 0, nullptr), + _node_to_pack(arena, _body.body().length(), _body.body().length(), nullptr) + NOT_PRODUCT(COMMA _trace_packset(trace_packset)) + NOT_PRODUCT(COMMA _trace_rejections(trace_rejections)) + {} + + // Accessors to iterate over packs. + int length() const { return _packs.length(); } + bool is_empty() const { return _packs.is_empty(); } + Node_List* at(int i) const { return _packs.at(i); } + +private: + void map_node_in_pack(const Node* n, Node_List* new_pack) { + assert(get_pack(n) == nullptr, "was previously unmapped"); + _node_to_pack.at_put(_body.bb_idx(n), new_pack); + } + + void remap_node_in_pack(const Node* n, Node_List* new_pack) { + assert(get_pack(n) != nullptr && new_pack != nullptr && get_pack(n) != new_pack, "was previously mapped"); + _node_to_pack.at_put(_body.bb_idx(n), new_pack); + } + + void unmap_node_in_pack(const Node* n) { + assert(get_pack(n) != nullptr, "was previously mapped"); + _node_to_pack.at_put(_body.bb_idx(n), nullptr); + } + + void unmap_all_nodes_in_pack(Node_List* old_pack) { + for (uint i = 0; i < old_pack->size(); i++) { + unmap_node_in_pack(old_pack->at(i)); + } + } +public: + Node_List* get_pack(const Node* n) const { return !_vloop.in_bb(n) ? nullptr : _node_to_pack.at(_body.bb_idx(n)); } + + void add_pack(Node_List* pack) { + _packs.append(pack); + for (uint i = 0; i < pack->size(); i++) { + Node* n = pack->at(i); + map_node_in_pack(n, pack); + } + } + +private: + SplitStatus split_pack(const char* split_name, Node_List* pack, SplitTask task); +public: + template + void split_packs(const char* split_name, SplitStrategy strategy); + + template + void filter_packs(const char* filter_name, + const char* rejection_message, + FilterPredicate filter); + + void clear() { _packs.clear(); } + +private: + NOT_PRODUCT(bool is_trace_superword_packset() const { return _trace_packset; }) + NOT_PRODUCT(bool is_trace_superword_rejections() const { return _trace_rejections; }) +public: + DEBUG_ONLY(void verify() const;) + NOT_PRODUCT(void print() const;) + NOT_PRODUCT(static void print_pack(Node_List* pack);) +}; + // ========================= SuperWord ===================== // -----------------------------SWNodeInfo--------------------------------- @@ -64,9 +391,8 @@ class VPointer; class SWNodeInfo { public: int _alignment; // memory alignment for a node - Node_List* _my_pack; // pack containing this node - SWNodeInfo() : _alignment(-1), _my_pack(nullptr) {} + SWNodeInfo() : _alignment(-1) {} static const SWNodeInfo initial; }; @@ -83,12 +409,13 @@ class SuperWord : public ResourceObj { enum consts { top_align = -1, bottom_align = -666 }; - GrowableArray _packset; // Packs for the current block - GrowableArray _node_info; // Info needed per node CloneMap& _clone_map; // map of nodes created in cloning MemNode const* _align_to_ref; // Memory reference that pre-loop will align to + PairSet _pairset; + PackSet _packset; + public: SuperWord(const VLoopAnalyzer &vloop_analyzer); @@ -112,7 +439,7 @@ class SuperWord : public ResourceObj { return _vloop_analyzer.reductions().is_marked_reduction(n); } - bool reduction(Node* n1, Node* n2) const { + bool reduction(const Node* n1, const Node* n2) const { return _vloop_analyzer.reductions().is_marked_reduction_pair(n1, n2); } @@ -219,9 +546,10 @@ class SuperWord : public ResourceObj { bool do_vector_loop() { return _do_vector_loop; } - const GrowableArray& packset() const { return _packset; } + const PackSet& packset() const { return _packset; } + Node_List* get_pack(const Node* n) const { return _packset.get_pack(n); } + private: - bool _race_possible; // In cases where SDMU is true bool _do_vector_loop; // whether to do vectorization/simd style int _num_work_vecs; // Number of non memory vector operations int _num_reductions; // Number of reduction expressions applied @@ -240,18 +568,13 @@ class SuperWord : public ResourceObj { bool vectors_should_be_aligned() { return !Matcher::misaligned_vectors_ok() || AlignVector; } // memory alignment for a node - int alignment(Node* n) { return _node_info.adr_at(bb_idx(n))->_alignment; } + int alignment(Node* n) const { return _node_info.adr_at(bb_idx(n))->_alignment; } void set_alignment(Node* n, int a) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; } - // my_pack - public: - Node_List* my_pack(const Node* n) const { return !in_bb(n) ? nullptr : _node_info.adr_at(bb_idx(n))->_my_pack; } - private: - void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; } // is pack good for converting into one vector node replacing bunches of Cmp, Bool, CMov nodes. static bool requires_long_to_int_conversion(int opc); // For pack p, are all idx operands the same? - bool same_inputs(const Node_List* p, int idx); + bool same_inputs(const Node_List* p, int idx) const; // CloneMap utilities bool same_origin_idx(Node* a, Node* b) const; bool same_generation(Node* a, Node* b) const; @@ -267,10 +590,8 @@ private: // Can s1 and s2 be in a pack with s1 immediately preceding s2 and s1 aligned at "align" bool stmts_can_pack(Node* s1, Node* s2, int align); - // Does s exist in a pack at position pos? - bool exists_at(Node* s, uint pos); // Is s1 immediately before s2 in memory? - bool are_adjacent_refs(Node* s1, Node* s2); + bool are_adjacent_refs(Node* s1, Node* s2) const; // Are s1 and s2 similar? bool isomorphic(Node* s1, Node* s2); // Do we have pattern n1 = (iv + c) and n2 = (iv + c + 1)? @@ -279,143 +600,31 @@ private: // do s1 and s2 have similar input edges? bool have_similar_inputs(Node* s1, Node* s2); void set_alignment(Node* s1, Node* s2, int align); - // Extend packset by following use->def and def->use links from pack members. - void extend_packset_with_more_pairs_by_following_use_and_def(); int adjust_alignment_for_type_conversion(Node* s, Node* t, int align); - // Extend the packset by visiting operand definitions of nodes in pack p - bool follow_use_defs(Node_List* p); - // Extend the packset by visiting uses of nodes in pack p - bool follow_def_uses(Node_List* p); - // For extended packsets, ordinally arrange uses packset by major component - void order_def_uses(Node_List* p); - // Estimate the savings from executing s1 and s2 as a pack - int est_savings(Node* s1, Node* s2); - int adjacent_profit(Node* s1, Node* s2); - int pack_cost(int ct); - int unpack_cost(int ct); - // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last + void extend_pairset_with_more_pairs_by_following_use_and_def(); + bool extend_pairset_with_more_pairs_by_following_def(Node* s1, Node* s2); + bool extend_pairset_with_more_pairs_by_following_use(Node* s1, Node* s2); + void order_inputs_of_all_use_pairs_to_match_def_pair(Node* def1, Node* def2); + enum PairOrderStatus { Ordered, Unordered, Unknown }; + PairOrderStatus order_inputs_of_uses_to_match_def_pair(Node* def1, Node* def2, Node* use1, Node* use2); + int estimate_cost_savings_when_packing_as_pair(const Node* s1, const Node* s2) const; + void combine_pairs_to_longer_packs(); - class SplitTask { - private: - enum Kind { - // The lambda method for split_packs can return one of these tasks: - Unchanged, // The pack is left in the packset, unchanged. - Rejected, // The pack is removed from the packset. - Split, // Split away split_size nodes from the end of the pack. - }; - const Kind _kind; - const uint _split_size; - const char* _message; - - SplitTask(const Kind kind, const uint split_size, const char* message) : - _kind(kind), _split_size(split_size), _message(message) - { - assert(message != nullptr, "must have message"); - assert(_kind != Unchanged || split_size == 0, "unchanged task conditions"); - assert(_kind != Rejected || split_size == 0, "reject task conditions"); - assert(_kind != Split || split_size != 0, "split task conditions"); - } - - public: - static SplitTask make_split(const uint split_size, const char* message) { - return SplitTask(Split, split_size, message); - } - - static SplitTask make_unchanged() { - return SplitTask(Unchanged, 0, "unchanged"); - } - - static SplitTask make_rejected(const char* message) { - return SplitTask(Rejected, 0, message); - } - - bool is_unchanged() const { return _kind == Unchanged; } - bool is_rejected() const { return _kind == Rejected; } - bool is_split() const { return _kind == Split; } - const char* message() const { return _message; } - - uint split_size() const { - assert(is_split(), "only split tasks have split_size"); - return _split_size; - } - }; - - class SplitStatus { - private: - enum Kind { - // After split_pack, we have: first_pack second_pack - Unchanged, // The pack is left in the pack, unchanged. old_pack nullptr - Rejected, // The pack is removed from the packset. nullptr nullptr - Modified, // The pack had some nodes removed. old_pack nullptr - Split, // The pack was split into two packs. pack1 pack2 - }; - Kind _kind; - Node_List* _first_pack; - Node_List* _second_pack; - - SplitStatus(Kind kind, Node_List* first_pack, Node_List* second_pack) : - _kind(kind), _first_pack(first_pack), _second_pack(second_pack) - { - assert(_kind != Unchanged || (first_pack != nullptr && second_pack == nullptr), "unchanged status conditions"); - assert(_kind != Rejected || (first_pack == nullptr && second_pack == nullptr), "rejected status conditions"); - assert(_kind != Modified || (first_pack != nullptr && second_pack == nullptr), "modified status conditions"); - assert(_kind != Split || (first_pack != nullptr && second_pack != nullptr), "split status conditions"); - } - - public: - static SplitStatus make_unchanged(Node_List* old_pack) { - return SplitStatus(Unchanged, old_pack, nullptr); - } - - static SplitStatus make_rejected() { - return SplitStatus(Rejected, nullptr, nullptr); - } - - static SplitStatus make_modified(Node_List* first_pack) { - return SplitStatus(Modified, first_pack, nullptr); - } - - static SplitStatus make_split(Node_List* first_pack, Node_List* second_pack) { - return SplitStatus(Split, first_pack, second_pack); - } - - bool is_unchanged() const { return _kind == Unchanged; } - Node_List* first_pack() const { return _first_pack; } - Node_List* second_pack() const { return _second_pack; } - }; - - SplitStatus split_pack(const char* split_name, Node_List* pack, SplitTask task); - template - void split_packs(const char* split_name, SplitStrategy strategy); - void split_packs_at_use_def_boundaries(); void split_packs_only_implemented_with_smaller_size(); void split_packs_to_break_mutual_dependence(); - // Filter out packs with various filter predicates - template - void filter_packs(const char* filter_name, - const char* error_message, - FilterPredicate filter); void filter_packs_for_power_of_2_size(); void filter_packs_for_mutual_independence(); - // Ensure all packs are aligned, if AlignVector is on. void filter_packs_for_alignment(); - // Find the set of alignment solutions for load/store pack. const AlignmentSolution* pack_alignment_solution(const Node_List* pack); - // Compress packset, such that it has no nullptr entries. - void compress_packset(); - // Construct the map from nodes to packs. - void construct_my_pack_map(); - // Remove packs that are not implemented. void filter_packs_for_implemented(); - // Remove packs that are not profitable. void filter_packs_for_profitable(); - // Verify that for every pack, all nodes are mutually independent. - // Also verify that packset and my_pack are consistent. - DEBUG_ONLY(void verify_packs();) + + DEBUG_ONLY(void verify_packs() const;) + // Adjust the memory graph for the packed operations void schedule(); // Helper function for schedule, that reorders all memops, slice by slice, according to the schedule @@ -427,12 +636,13 @@ private: Node* vector_opd(Node_List* p, int opd_idx); // Can code be generated for the pack, restricted to size nodes? - bool implemented(const Node_List* pack, uint size); + bool implemented(const Node_List* pack, const uint size) const; // Find the maximal implemented size smaller or equal to the packs size uint max_implemented_size(const Node_List* pack); // For pack p, are all operands and all uses (with in the block) vector? - bool profitable(const Node_List* p); + bool profitable(const Node_List* p) const; + // Verify that all uses of packs are also packs, i.e. we do not need extract operations. DEBUG_ONLY(void verify_no_extract();) @@ -440,35 +650,22 @@ private: bool has_use_pack_superset(const Node* n1, const Node* n2) const; // Find a boundary in the pack, where left and right have different pack uses and defs. uint find_use_def_boundary(const Node_List* pack) const; + // Is use->in(u_idx) a vector use? - bool is_vector_use(Node* use, int u_idx); + bool is_vector_use(Node* use, int u_idx) const; // Initialize per node info void initialize_node_info(); - // Compute max depth for expressions from beginning of block - void compute_max_depth(); // Return the longer type for vectorizable type-conversion node or illegal type for other nodes. - BasicType longer_type_for_conversion(Node* n); + BasicType longer_type_for_conversion(Node* n) const; // Find the longest type in def-use chain for packed nodes, and then compute the max vector size. int max_vector_size_in_def_use_chain(Node* n); - // Are s1 and s2 in a pack pair and ordered as s1,s2? - bool in_packset(Node* s1, Node* s2); - // Remove the pack at position pos in the packset - void remove_pack_at(int pos); + static LoadNode::ControlDependency control_dependency(Node_List* p); // Alignment within a vector memory reference int memory_alignment(MemNode* s, int iv_adjust); // Ensure that the main loop vectors are aligned by adjusting the pre loop limit. void adjust_pre_loop_limit_to_align_main_loop_vectors(); - // Is the use of d1 in u1 at the same operand position as d2 in u2? - bool opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2); - - // print methods - void print_packset(); - void print_pack(Node_List* p); - void print_stmt(Node* s); - - void packset_sort(int n); }; #endif // SHARE_OPTO_SUPERWORD_HPP diff --git a/src/hotspot/share/opto/vectorization.hpp b/src/hotspot/share/opto/vectorization.hpp index 88a46a3d688..acc6bbf475d 100644 --- a/src/hotspot/share/opto/vectorization.hpp +++ b/src/hotspot/share/opto/vectorization.hpp @@ -275,7 +275,7 @@ public: bool is_marked_reduction_loop() const { return !_loop_reductions.is_empty(); } // Are s1 and s2 reductions with a data path between them? - bool is_marked_reduction_pair(Node* s1, Node* s2) const; + bool is_marked_reduction_pair(const Node* s1, const Node* s2) const; private: // Whether n is a standard reduction operator. diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index f5fdae79e32..f81c29649bb 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -426,7 +426,7 @@ bool VectorNode::is_type_transition_to_int(Node* n) { return is_type_transition_short_to_int(n); } -bool VectorNode::is_muladds2i(Node* n) { +bool VectorNode::is_muladds2i(const Node* n) { if (n->Opcode() == Op_MulAddS2I) { return true; } diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index e4d3d013cd3..740c07d64ff 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -102,7 +102,7 @@ class VectorNode : public TypeNode { static bool is_vshift_cnt(Node* n); static bool is_type_transition_short_to_int(Node* n); static bool is_type_transition_to_int(Node* n); - static bool is_muladds2i(Node* n); + static bool is_muladds2i(const Node* n); static bool is_roundopD(Node* n); static bool is_scalar_rotate(Node* n); static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt); diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java index 3cb0152659c..c65da58b285 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java @@ -23,7 +23,7 @@ /** * @test - * @bug 8310886 + * @bug 8310886 8325252 * @summary Test MulAddS2I vectorization. * @library /test/lib / * @run driver compiler.loopopts.superword.TestMulAddS2I @@ -36,14 +36,20 @@ import jdk.test.lib.Asserts; import jdk.test.lib.Platform; public class TestMulAddS2I { - static final int RANGE = 1024; + static final int RANGE = 1024*16; static final int ITER = RANGE/2 - 1; static short[] sArr1 = new short[RANGE]; static short[] sArr2 = new short[RANGE]; + static int[] ioutArr = new int[RANGE]; static final int[] GOLDEN_A; static final int[] GOLDEN_B; static final int[] GOLDEN_C; + static final int[] GOLDEN_D; + static final int[] GOLDEN_E; + static final int[] GOLDEN_F; + static final int[] GOLDEN_G; + static final int[] GOLDEN_H; static { for (int i = 0; i < RANGE; i++) { @@ -53,6 +59,11 @@ public class TestMulAddS2I { GOLDEN_A = testa(); GOLDEN_B = testb(); GOLDEN_C = testc(); + GOLDEN_D = testd(); + GOLDEN_E = teste(); + GOLDEN_F = testf(); + GOLDEN_G = testg(); + GOLDEN_H = testh(); } @@ -65,12 +76,17 @@ public class TestMulAddS2I { } } - @Run(test = {"testa", "testb", "testc"}) + @Run(test = {"testa", "testb", "testc", "testd", "teste", "testf", "testg", "testh"}) @Warmup(0) public static void run() { compare(testa(), GOLDEN_A, "testa"); compare(testb(), GOLDEN_B, "testb"); - compare(testb(), GOLDEN_C, "testc"); + compare(testc(), GOLDEN_C, "testc"); + compare(testd(), GOLDEN_D, "testd"); + compare(teste(), GOLDEN_E, "teste"); + compare(testf(), GOLDEN_F, "testf"); + compare(testg(), GOLDEN_G, "testg"); + compare(testh(), GOLDEN_H, "testh"); } public static void compare(int[] out, int[] golden, String name) { @@ -133,4 +149,99 @@ public class TestMulAddS2I { } return out; } + + @Test + @IR(applyIfCPUFeature = {"sse2", "true"}, + applyIfPlatform = {"64-bit", "true"}, + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) + @IR(applyIfCPUFeature = {"asimd", "true"}, + applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16 + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) + @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) + public static int[] testd() { + int[] out = ioutArr; + for (int i = 0; i < ITER-2; i+=2) { + // Unrolled, with the same structure. + out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); + out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); + } + return out; + } + + @Test + @IR(applyIfCPUFeature = {"sse2", "true"}, + applyIfPlatform = {"64-bit", "true"}, + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) + @IR(applyIfCPUFeature = {"asimd", "true"}, + applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16 + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) + @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) + public static int[] teste() { + int[] out = ioutArr; + for (int i = 0; i < ITER-2; i+=2) { + // Unrolled, with some swaps. + out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); + out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // swap(1 2) + } + return out; + } + + @Test + @IR(applyIfCPUFeature = {"sse2", "true"}, + applyIfPlatform = {"64-bit", "true"}, + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) + @IR(applyIfCPUFeature = {"asimd", "true"}, + applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16 + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) + @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) + public static int[] testf() { + int[] out = ioutArr; + for (int i = 0; i < ITER-2; i+=2) { + // Unrolled, with some swaps. + out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); + out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr2[2*i+3] * sArr1[2*i+3])); // swap(1 2), swap(3 4) + } + return out; + } + + @Test + @IR(applyIfCPUFeature = {"sse2", "true"}, + applyIfPlatform = {"64-bit", "true"}, + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) + @IR(applyIfCPUFeature = {"asimd", "true"}, + applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16 + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) + @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) + public static int[] testg() { + int[] out = ioutArr; + for (int i = 0; i < ITER-2; i+=2) { + // Unrolled, with some swaps. + out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); + out[i+1] += ((sArr1[2*i+3] * sArr2[2*i+3]) + (sArr1[2*i+2] * sArr2[2*i+2])); // swap(1 3), swap(2 4) + } + return out; + } + + @Test + @IR(applyIfCPUFeature = {"sse2", "true"}, + applyIfPlatform = {"64-bit", "true"}, + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) + @IR(applyIfCPUFeature = {"asimd", "true"}, + applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16 + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) + @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, + counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) + public static int[] testh() { + int[] out = ioutArr; + for (int i = 0; i < ITER-2; i+=2) { + // Unrolled, with some swaps. + out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); + out[i+1] += ((sArr2[2*i+3] * sArr1[2*i+3]) + (sArr2[2*i+2] * sArr1[2*i+2])); // swap(1 4), swap(2 3) + } + return out; + } }