8331764: C2 SuperWord: refactor _align_to_ref/_mem_ref_for_main_loop_alignment

Reviewed-by: kvn, chagedorn
This commit is contained in:
Emanuel Peter 2024-05-13 05:45:36 +00:00
parent 5053b70a7f
commit d517d2df45
2 changed files with 47 additions and 26 deletions
src/hotspot/share/opto

@ -45,12 +45,13 @@ SuperWord::SuperWord(const VLoopAnalyzer &vloop_analyzer) :
_arena(mtCompiler),
_node_info(arena(), _vloop.estimated_body_length(), 0, SWNodeInfo::initial), // info needed per node
_clone_map(phase()->C->clone_map()), // map of nodes created in cloning
_align_to_ref(nullptr), // memory reference to align vectors to
_pairset(&_arena, _vloop_analyzer),
_packset(&_arena, _vloop_analyzer
NOT_PRODUCT(COMMA is_trace_superword_packset())
NOT_PRODUCT(COMMA is_trace_superword_rejections())
),
_mem_ref_for_main_loop_alignment(nullptr),
_aw_for_main_loop_alignment(0),
_do_vector_loop(phase()->C->do_vector_loop()), // whether to do vectorization/simd style
_num_work_vecs(0), // amount of vector work we have
_num_reductions(0) // amount of reduction work we have
@ -516,22 +517,12 @@ void SuperWord::find_adjacent_refs() {
int max_idx;
// Take the first mem_ref as the reference to align to. The pre-loop trip count is
// modified to align this reference to a vector-aligned address. If strict alignment
// is required, we may change the reference later (see filter_packs_for_alignment()).
MemNode* align_to_mem_ref = nullptr;
while (memops.size() != 0) {
// Find a memory reference to align to.
MemNode* mem_ref = find_align_to_ref(memops, max_idx);
if (mem_ref == nullptr) break;
int iv_adjustment = get_iv_adjustment(mem_ref);
if (align_to_mem_ref == nullptr) {
align_to_mem_ref = mem_ref;
set_align_to_ref(align_to_mem_ref);
}
const VPointer& align_to_ref_p = vpointer(mem_ref);
// Set alignment relative to "align_to_ref" for all related memory operations.
for (int i = memops.size() - 1; i >= 0; i--) {
@ -573,9 +564,6 @@ void SuperWord::find_adjacent_refs() {
}
} // while (memops.size() != 0)
assert(_pairset.is_empty() || align_to_mem_ref != nullptr,
"pairset empty or we find the alignment reference");
#ifndef PRODUCT
if (is_trace_superword_packset()) {
tty->print_cr("\nAfter Superword::find_adjacent_refs");
@ -1723,7 +1711,11 @@ void SuperWord::filter_packs_for_alignment() {
if (current->is_constrained()) {
// Solution is constrained (not trivial)
// -> must change pre-limit to achieve alignment
set_align_to_ref(current->as_constrained()->mem_ref());
MemNode const* mem = current->as_constrained()->mem_ref();
Node_List* pack = get_pack(mem);
assert(pack != nullptr, "memop of final solution must still be packed");
_mem_ref_for_main_loop_alignment = mem;
_aw_for_main_loop_alignment = pack->size() * mem->memory_size();
}
}
@ -3397,6 +3389,32 @@ LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) {
return dep;
}
// Find the memop pack with the maximum vector width, unless they were already
// determined by SuperWord::filter_packs_for_alignment().
void SuperWord::determine_mem_ref_and_aw_for_main_loop_alignment() {
if (_mem_ref_for_main_loop_alignment != nullptr) {
assert(vectors_should_be_aligned(), "mem_ref only set if filtered for alignment");
return;
}
MemNode const* mem_ref = nullptr;
int max_aw = 0;
for (int i = 0; i < _packset.length(); i++) {
Node_List* pack = _packset.at(i);
MemNode* first = pack->at(0)->isa_Mem();
if (first == nullptr) { continue; }
int vw = first->memory_size() * pack->size();
if (vw > max_aw) {
max_aw = vw;
mem_ref = first;
}
}
assert(mem_ref != nullptr && max_aw > 0, "found mem_ref and aw");
_mem_ref_for_main_loop_alignment = mem_ref;
_aw_for_main_loop_alignment = max_aw;
}
#define TRACE_ALIGN_VECTOR_NODE(node) { \
DEBUG_ONLY( \
if (is_trace_align_vector()) { \
@ -3407,11 +3425,14 @@ LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) {
} \
// Ensure that the main loop vectors are aligned by adjusting the pre loop limit. We memory-align
// the address of "align_to_ref" to the maximal possible vector width. We adjust the pre-loop
// iteration count by adjusting the pre-loop limit.
// the address of "_mem_ref_for_main_loop_alignment" to "_aw_for_main_loop_alignment", which is a
// sufficiently large alignment width. We adjust the pre-loop iteration count by adjusting the
// pre-loop limit.
void SuperWord::adjust_pre_loop_limit_to_align_main_loop_vectors() {
const MemNode* align_to_ref = _align_to_ref;
assert(align_to_ref != nullptr, "align_to_ref must be set");
determine_mem_ref_and_aw_for_main_loop_alignment();
const MemNode* align_to_ref = _mem_ref_for_main_loop_alignment;
const int aw = _aw_for_main_loop_alignment;
assert(align_to_ref != nullptr && aw > 0, "must have alignment reference and aw");
assert(cl()->is_main_loop(), "can only do alignment for main loop");
// The opaque node for the limit, where we adjust the input
@ -3556,10 +3577,7 @@ void SuperWord::adjust_pre_loop_limit_to_align_main_loop_vectors() {
// = MIN(new_limit, orig_limit) (15a, stride > 0)
// constrained_limit = MAX(old_limit - adjust_pre_iter, orig_limit)
// = MAX(new_limit, orig_limit) (15a, stride < 0)
// We chose an aw that is the maximal possible vector width for the type of
// align_to_ref.
const int aw = vector_width_in_bytes(align_to_ref);
//
const int stride = iv_stride();
const int scale = align_to_ref_p.scale_in_bytes();
const int offset = align_to_ref_p.offset_in_bytes();

@ -411,11 +411,15 @@ class SuperWord : public ResourceObj {
GrowableArray<SWNodeInfo> _node_info; // Info needed per node
CloneMap& _clone_map; // map of nodes created in cloning
MemNode const* _align_to_ref; // Memory reference that pre-loop will align to
PairSet _pairset;
PackSet _packset;
// Memory reference, and the alignment width (aw) for which we align the main-loop,
// by adjusting the pre-loop limit.
MemNode const* _mem_ref_for_main_loop_alignment;
int _aw_for_main_loop_alignment;
public:
SuperWord(const VLoopAnalyzer &vloop_analyzer);
@ -563,8 +567,6 @@ class SuperWord : public ResourceObj {
Arena* arena() { return &_arena; }
int get_vw_bytes_special(MemNode* s);
const MemNode* align_to_ref() const { return _align_to_ref; }
void set_align_to_ref(const MemNode* m) { _align_to_ref = m; }
// Ensure node_info contains element "i"
void grow_node_info(int i) { if (i >= _node_info.length()) _node_info.at_put_grow(i, SWNodeInfo::initial); }
@ -670,6 +672,7 @@ private:
// Alignment within a vector memory reference
int memory_alignment(MemNode* s, int iv_adjust);
// Ensure that the main loop vectors are aligned by adjusting the pre loop limit.
void determine_mem_ref_and_aw_for_main_loop_alignment();
void adjust_pre_loop_limit_to_align_main_loop_vectors();
};