diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index cd9b76eb44d..f41e8a7b8b3 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -45,12 +45,13 @@ SuperWord::SuperWord(const VLoopAnalyzer &vloop_analyzer) : _arena(mtCompiler), _node_info(arena(), _vloop.estimated_body_length(), 0, SWNodeInfo::initial), // info needed per node _clone_map(phase()->C->clone_map()), // map of nodes created in cloning - _align_to_ref(nullptr), // memory reference to align vectors to _pairset(&_arena, _vloop_analyzer), _packset(&_arena, _vloop_analyzer NOT_PRODUCT(COMMA is_trace_superword_packset()) NOT_PRODUCT(COMMA is_trace_superword_rejections()) ), + _mem_ref_for_main_loop_alignment(nullptr), + _aw_for_main_loop_alignment(0), _do_vector_loop(phase()->C->do_vector_loop()), // whether to do vectorization/simd style _num_work_vecs(0), // amount of vector work we have _num_reductions(0) // amount of reduction work we have @@ -516,22 +517,12 @@ void SuperWord::find_adjacent_refs() { int max_idx; - // Take the first mem_ref as the reference to align to. The pre-loop trip count is - // modified to align this reference to a vector-aligned address. If strict alignment - // is required, we may change the reference later (see filter_packs_for_alignment()). - MemNode* align_to_mem_ref = nullptr; - while (memops.size() != 0) { // Find a memory reference to align to. MemNode* mem_ref = find_align_to_ref(memops, max_idx); if (mem_ref == nullptr) break; int iv_adjustment = get_iv_adjustment(mem_ref); - if (align_to_mem_ref == nullptr) { - align_to_mem_ref = mem_ref; - set_align_to_ref(align_to_mem_ref); - } - const VPointer& align_to_ref_p = vpointer(mem_ref); // Set alignment relative to "align_to_ref" for all related memory operations. for (int i = memops.size() - 1; i >= 0; i--) { @@ -573,9 +564,6 @@ void SuperWord::find_adjacent_refs() { } } // while (memops.size() != 0) - assert(_pairset.is_empty() || align_to_mem_ref != nullptr, - "pairset empty or we find the alignment reference"); - #ifndef PRODUCT if (is_trace_superword_packset()) { tty->print_cr("\nAfter Superword::find_adjacent_refs"); @@ -1723,7 +1711,11 @@ void SuperWord::filter_packs_for_alignment() { if (current->is_constrained()) { // Solution is constrained (not trivial) // -> must change pre-limit to achieve alignment - set_align_to_ref(current->as_constrained()->mem_ref()); + MemNode const* mem = current->as_constrained()->mem_ref(); + Node_List* pack = get_pack(mem); + assert(pack != nullptr, "memop of final solution must still be packed"); + _mem_ref_for_main_loop_alignment = mem; + _aw_for_main_loop_alignment = pack->size() * mem->memory_size(); } } @@ -3397,6 +3389,32 @@ LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) { return dep; } +// Find the memop pack with the maximum vector width, unless they were already +// determined by SuperWord::filter_packs_for_alignment(). +void SuperWord::determine_mem_ref_and_aw_for_main_loop_alignment() { + if (_mem_ref_for_main_loop_alignment != nullptr) { + assert(vectors_should_be_aligned(), "mem_ref only set if filtered for alignment"); + return; + } + + MemNode const* mem_ref = nullptr; + int max_aw = 0; + for (int i = 0; i < _packset.length(); i++) { + Node_List* pack = _packset.at(i); + MemNode* first = pack->at(0)->isa_Mem(); + if (first == nullptr) { continue; } + + int vw = first->memory_size() * pack->size(); + if (vw > max_aw) { + max_aw = vw; + mem_ref = first; + } + } + assert(mem_ref != nullptr && max_aw > 0, "found mem_ref and aw"); + _mem_ref_for_main_loop_alignment = mem_ref; + _aw_for_main_loop_alignment = max_aw; +} + #define TRACE_ALIGN_VECTOR_NODE(node) { \ DEBUG_ONLY( \ if (is_trace_align_vector()) { \ @@ -3407,11 +3425,14 @@ LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) { } \ // Ensure that the main loop vectors are aligned by adjusting the pre loop limit. We memory-align -// the address of "align_to_ref" to the maximal possible vector width. We adjust the pre-loop -// iteration count by adjusting the pre-loop limit. +// the address of "_mem_ref_for_main_loop_alignment" to "_aw_for_main_loop_alignment", which is a +// sufficiently large alignment width. We adjust the pre-loop iteration count by adjusting the +// pre-loop limit. void SuperWord::adjust_pre_loop_limit_to_align_main_loop_vectors() { - const MemNode* align_to_ref = _align_to_ref; - assert(align_to_ref != nullptr, "align_to_ref must be set"); + determine_mem_ref_and_aw_for_main_loop_alignment(); + const MemNode* align_to_ref = _mem_ref_for_main_loop_alignment; + const int aw = _aw_for_main_loop_alignment; + assert(align_to_ref != nullptr && aw > 0, "must have alignment reference and aw"); assert(cl()->is_main_loop(), "can only do alignment for main loop"); // The opaque node for the limit, where we adjust the input @@ -3556,10 +3577,7 @@ void SuperWord::adjust_pre_loop_limit_to_align_main_loop_vectors() { // = MIN(new_limit, orig_limit) (15a, stride > 0) // constrained_limit = MAX(old_limit - adjust_pre_iter, orig_limit) // = MAX(new_limit, orig_limit) (15a, stride < 0) - - // We chose an aw that is the maximal possible vector width for the type of - // align_to_ref. - const int aw = vector_width_in_bytes(align_to_ref); + // const int stride = iv_stride(); const int scale = align_to_ref_p.scale_in_bytes(); const int offset = align_to_ref_p.offset_in_bytes(); diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp index 7d1ba1131f3..159032d94b9 100644 --- a/src/hotspot/share/opto/superword.hpp +++ b/src/hotspot/share/opto/superword.hpp @@ -411,11 +411,15 @@ class SuperWord : public ResourceObj { GrowableArray<SWNodeInfo> _node_info; // Info needed per node CloneMap& _clone_map; // map of nodes created in cloning - MemNode const* _align_to_ref; // Memory reference that pre-loop will align to PairSet _pairset; PackSet _packset; + // Memory reference, and the alignment width (aw) for which we align the main-loop, + // by adjusting the pre-loop limit. + MemNode const* _mem_ref_for_main_loop_alignment; + int _aw_for_main_loop_alignment; + public: SuperWord(const VLoopAnalyzer &vloop_analyzer); @@ -563,8 +567,6 @@ class SuperWord : public ResourceObj { Arena* arena() { return &_arena; } int get_vw_bytes_special(MemNode* s); - const MemNode* align_to_ref() const { return _align_to_ref; } - void set_align_to_ref(const MemNode* m) { _align_to_ref = m; } // Ensure node_info contains element "i" void grow_node_info(int i) { if (i >= _node_info.length()) _node_info.at_put_grow(i, SWNodeInfo::initial); } @@ -670,6 +672,7 @@ private: // Alignment within a vector memory reference int memory_alignment(MemNode* s, int iv_adjust); // Ensure that the main loop vectors are aligned by adjusting the pre loop limit. + void determine_mem_ref_and_aw_for_main_loop_alignment(); void adjust_pre_loop_limit_to_align_main_loop_vectors(); };