8308606: C2 SuperWord: remove alignment checks when not required
Reviewed-by: fgao, kvn, pli
This commit is contained in:
parent
47d00a4cbe
commit
886ac1c261
@ -724,9 +724,9 @@ void SuperWord::find_adjacent_refs() {
|
||||
}
|
||||
}
|
||||
|
||||
if (can_create_pairs(mem_ref, iv_adjustment, align_to_ref_p,
|
||||
best_align_to_mem_ref, best_iv_adjustment,
|
||||
align_to_refs)) {
|
||||
if (mem_ref_has_no_alignment_violation(mem_ref, iv_adjustment, align_to_ref_p,
|
||||
best_align_to_mem_ref, best_iv_adjustment,
|
||||
align_to_refs)) {
|
||||
// Create initial pack pairs of memory operations for which alignment was set.
|
||||
for (uint i = 0; i < memops.size(); i++) {
|
||||
Node* s1 = memops.at(i);
|
||||
@ -836,93 +836,45 @@ void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best
|
||||
}
|
||||
#endif
|
||||
|
||||
// Check if we can create the pack pairs for mem_ref:
|
||||
// If required, enforce strict alignment requirements of hardware.
|
||||
// Else, only enforce alignment within a memory slice, so that there cannot be any
|
||||
// memory-dependence between different vector "lanes".
|
||||
bool SuperWord::can_create_pairs(MemNode* mem_ref, int iv_adjustment, SWPointer &align_to_ref_p,
|
||||
MemNode* best_align_to_mem_ref, int best_iv_adjustment,
|
||||
Node_List &align_to_refs) {
|
||||
bool is_aligned_with_best = memory_alignment(mem_ref, best_iv_adjustment) == 0;
|
||||
|
||||
if (vectors_should_be_aligned()) {
|
||||
// All vectors need to be memory aligned, modulo their vector_width. This is more strict
|
||||
// than the hardware probably requires. Most hardware at most requires 4-byte alignment.
|
||||
//
|
||||
// In the pre-loop, we align best_align_to_mem_ref to its vector_length. To ensure that
|
||||
// all mem_ref's are memory aligned modulo their vector_width, we only need to check that
|
||||
// they are all aligned to best_align_to_mem_ref, modulo their vector_width. For that,
|
||||
// we check the following 3 conditions.
|
||||
|
||||
// (1) All packs are aligned with best_align_to_mem_ref.
|
||||
if (!is_aligned_with_best) {
|
||||
return false;
|
||||
}
|
||||
// (2) All other vectors have vector_size less or equal to that of best_align_to_mem_ref.
|
||||
int vw = vector_width(mem_ref);
|
||||
int vw_best = vector_width(best_align_to_mem_ref);
|
||||
if (vw > vw_best) {
|
||||
// We only align to vector_width of best_align_to_mem_ref during pre-loop.
|
||||
// A mem_ref with a larger vector_width might thus not be vector_width aligned.
|
||||
return false;
|
||||
}
|
||||
// (3) Ensure that all vectors have the same invariant. We model memory accesses like this
|
||||
// address = base + k*iv + constant [+ invar]
|
||||
// memory_alignment ignores the invariant.
|
||||
SWPointer p2(best_align_to_mem_ref, this, nullptr, false);
|
||||
if (!align_to_ref_p.invar_equals(p2)) {
|
||||
// Do not vectorize memory accesses with different invariants
|
||||
// if unaligned memory accesses are not allowed.
|
||||
return false;
|
||||
}
|
||||
// If strict memory alignment is required (vectors_should_be_aligned), then check if
|
||||
// mem_ref is aligned with best_align_to_mem_ref.
|
||||
bool SuperWord::mem_ref_has_no_alignment_violation(MemNode* mem_ref, int iv_adjustment, SWPointer &align_to_ref_p,
|
||||
MemNode* best_align_to_mem_ref, int best_iv_adjustment,
|
||||
Node_List &align_to_refs) {
|
||||
if (!vectors_should_be_aligned()) {
|
||||
// Alignment is not required by the hardware. No violation possible.
|
||||
return true;
|
||||
} else {
|
||||
// Alignment is not required by the hardware.
|
||||
|
||||
// However, we need to ensure that the pack for mem_ref is independent, i.e. all members
|
||||
// of the pack are mutually independent.
|
||||
|
||||
if (_do_vector_loop) {
|
||||
// Wait until combine_packs to check independence of packs. For now we just know that
|
||||
// the adjacent pairs are independent. This allows us to vectorize when we do not have
|
||||
// alignment modulo vector_width. For example (forward read):
|
||||
// for (int i ...) { v[i] = v[i + 1] + 5; }
|
||||
// The following will be filtered out in combine_packs (forward write):
|
||||
// for (int i ...) { v[i + 1] = v[i] + 5; }
|
||||
return true;
|
||||
}
|
||||
|
||||
// If all mem_ref's are modulo vector_width aligned with all other mem_ref's of their
|
||||
// memory slice, then the VectorLoad / VectorStore regions are either exactly overlapping
|
||||
// or completely non-overlapping. This ensures that there cannot be memory-dependencies
|
||||
// between different vector "lanes".
|
||||
// During SuperWord::filter_packs -> SuperWord::profitable -> SuperWord::is_vector_use,
|
||||
// we check that all inputs are vectors that match on every element (with some reasonable
|
||||
// exceptions). This ensures that every "lane" is isomorpic and independent to all other
|
||||
// "lanes". This allows us to vectorize these cases:
|
||||
// for (int i ...) { v[i] = v[i] + 5; } // same alignment
|
||||
// for (int i ...) { v[i] = v[i + 32] + 5; } // alignment modulo vector_width
|
||||
if (same_memory_slice(mem_ref, best_align_to_mem_ref)) {
|
||||
return is_aligned_with_best;
|
||||
} else {
|
||||
return is_mem_ref_aligned_with_same_memory_slice(mem_ref, iv_adjustment, align_to_refs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if alignment of mem_ref is consistent with the other packs of the same memory slice
|
||||
bool SuperWord::is_mem_ref_aligned_with_same_memory_slice(MemNode* mem_ref, int iv_adjustment,
|
||||
Node_List &align_to_refs) {
|
||||
for (uint i = 0; i < align_to_refs.size(); i++) {
|
||||
MemNode* mr = align_to_refs.at(i)->as_Mem();
|
||||
if (mr != mem_ref &&
|
||||
same_memory_slice(mr, mem_ref) &&
|
||||
memory_alignment(mr, iv_adjustment) != 0) {
|
||||
// mem_ref is misaligned with mr, another ref of the same memory slice.
|
||||
return false;
|
||||
}
|
||||
// All vectors need to be memory aligned, modulo their vector_width. This is more strict
|
||||
// than the hardware probably requires. Most hardware at most requires 4-byte alignment.
|
||||
//
|
||||
// In the pre-loop, we align best_align_to_mem_ref to its vector_length. To ensure that
|
||||
// all mem_ref's are memory aligned modulo their vector_width, we only need to check that
|
||||
// they are all aligned to best_align_to_mem_ref, modulo their vector_width. For that,
|
||||
// we check the following 3 conditions.
|
||||
|
||||
// (1) All packs are aligned with best_align_to_mem_ref.
|
||||
if (memory_alignment(mem_ref, best_iv_adjustment) != 0) {
|
||||
return false;
|
||||
}
|
||||
// (2) All other vectors have vector_size less or equal to that of best_align_to_mem_ref.
|
||||
int vw = vector_width(mem_ref);
|
||||
int vw_best = vector_width(best_align_to_mem_ref);
|
||||
if (vw > vw_best) {
|
||||
// We only align to vector_width of best_align_to_mem_ref during pre-loop.
|
||||
// A mem_ref with a larger vector_width might thus not be vector_width aligned.
|
||||
return false;
|
||||
}
|
||||
// (3) Ensure that all vectors have the same invariant. We model memory accesses like this
|
||||
// address = base + k*iv + constant [+ invar]
|
||||
// memory_alignment ignores the invariant.
|
||||
SWPointer p2(best_align_to_mem_ref, this, nullptr, false);
|
||||
if (!align_to_ref_p.invar_equals(p2)) {
|
||||
// Do not vectorize memory accesses with different invariants
|
||||
// if unaligned memory accesses are not allowed.
|
||||
return false;
|
||||
}
|
||||
// No misalignment found.
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1901,9 +1853,14 @@ void SuperWord::combine_packs() {
|
||||
assert(is_power_of_2(max_vlen), "sanity");
|
||||
uint psize = p1->size();
|
||||
if (!is_power_of_2(psize)) {
|
||||
// Skip pack which can't be vector.
|
||||
// case1: for(...) { a[i] = i; } elements values are different (i+x)
|
||||
// case2: for(...) { a[i] = b[i+1]; } can't align both, load and store
|
||||
// We currently only support power-of-2 sizes for vectors.
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord) {
|
||||
tty->cr();
|
||||
tty->print_cr("WARNING: Removed pack[%d] with size that is not a power of 2:", i);
|
||||
print_pack(p1);
|
||||
}
|
||||
#endif
|
||||
_packset.at_put(i, nullptr);
|
||||
continue;
|
||||
}
|
||||
@ -1922,28 +1879,41 @@ void SuperWord::combine_packs() {
|
||||
}
|
||||
}
|
||||
|
||||
if (_do_vector_loop) {
|
||||
// Since we did not enforce exact alignment of the packsets, we only know that there
|
||||
// is no dependence with distance 1, because we have checked independent(s1, s2) for
|
||||
// all adjacent memops. But there could be a dependence of a different distance.
|
||||
// Hence: remove the pack if there is a dependence.
|
||||
for (int i = 0; i < _packset.length(); i++) {
|
||||
Node_List* p = _packset.at(i);
|
||||
if (p != nullptr) {
|
||||
Node* dependence = find_dependence(p);
|
||||
if (dependence != nullptr) {
|
||||
// We know that the nodes in a pair pack were independent - this gives us independence
|
||||
// at distance 1. But now that we may have more than 2 nodes in a pack, we need to check
|
||||
// if they are all mutually independent. If there is a dependence we remove the pack.
|
||||
// This is better than giving up completely - we can have partial vectorization if some
|
||||
// are rejected and others still accepted.
|
||||
//
|
||||
// Examples with dependence at distance 1 (pack pairs are not created):
|
||||
// for (int i ...) { v[i + 1] = v[i] + 5; }
|
||||
// for (int i ...) { v[i] = v[i - 1] + 5; }
|
||||
//
|
||||
// Example with independence at distance 1, but dependence at distance 2 (pack pairs are
|
||||
// created and we need to filter them out now):
|
||||
// for (int i ...) { v[i + 2] = v[i] + 5; }
|
||||
// for (int i ...) { v[i] = v[i - 2] + 5; }
|
||||
//
|
||||
// Note: dependencies are created when a later load may reference the same memory location
|
||||
// as an earlier store. This happens in "read backward" or "store forward" cases. On the
|
||||
// other hand, "read forward" or "store backward" cases do not have such dependencies:
|
||||
// for (int i ...) { v[i] = v[i + 1] + 5; }
|
||||
// for (int i ...) { v[i - 1] = v[i] + 5; }
|
||||
for (int i = 0; i < _packset.length(); i++) {
|
||||
Node_List* p = _packset.at(i);
|
||||
if (p != nullptr) {
|
||||
Node* dependence = find_dependence(p);
|
||||
if (dependence != nullptr) {
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord) {
|
||||
tty->cr();
|
||||
tty->print_cr("WARNING: Found dependency.");
|
||||
tty->print_cr("Cannot vectorize despite compile directive Vectorize.");
|
||||
dependence->dump();
|
||||
tty->print_cr("In pack[%d]", i);
|
||||
print_pack(p);
|
||||
}
|
||||
#endif
|
||||
_packset.at_put(i, nullptr);
|
||||
if (TraceSuperWord) {
|
||||
tty->cr();
|
||||
tty->print_cr("WARNING: Found dependency at distance greater than 1.");
|
||||
dependence->dump();
|
||||
tty->print_cr("In pack[%d]", i);
|
||||
print_pack(p);
|
||||
}
|
||||
#endif
|
||||
_packset.at_put(i, nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3757,7 +3727,7 @@ int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
|
||||
int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
|
||||
#ifndef PRODUCT
|
||||
if ((TraceSuperWord && Verbose) || is_trace_alignment()) {
|
||||
tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);
|
||||
tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d (offset = %d)", off_rem, off_mod, offset);
|
||||
}
|
||||
#endif
|
||||
return off_mod;
|
||||
|
@ -513,15 +513,11 @@ private:
|
||||
void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment);
|
||||
void print_loop(bool whole);
|
||||
#endif
|
||||
// Check if we can create the pack pairs for mem_ref:
|
||||
// If required, enforce strict alignment requirements of hardware.
|
||||
// Else, only enforce alignment within a memory slice, so that there cannot be any
|
||||
// memory-dependence between different vector "lanes".
|
||||
bool can_create_pairs(MemNode* mem_ref, int iv_adjustment, SWPointer &align_to_ref_p,
|
||||
MemNode* best_align_to_mem_ref, int best_iv_adjustment,
|
||||
Node_List &align_to_refs);
|
||||
// Check if alignment of mem_ref is consistent with the other packs of the same memory slice.
|
||||
bool is_mem_ref_aligned_with_same_memory_slice(MemNode* mem_ref, int iv_adjustment, Node_List &align_to_refs);
|
||||
// If strict memory alignment is required (vectors_should_be_aligned), then check if
|
||||
// mem_ref is aligned with best_align_to_mem_ref.
|
||||
bool mem_ref_has_no_alignment_violation(MemNode* mem_ref, int iv_adjustment, SWPointer &align_to_ref_p,
|
||||
MemNode* best_align_to_mem_ref, int best_iv_adjustment,
|
||||
Node_List &align_to_refs);
|
||||
// Find a memory reference to align the loop induction variable to.
|
||||
MemNode* find_align_to_ref(Node_List &memops, int &idx);
|
||||
// Calculate loop's iv adjustment for this memory ops.
|
||||
|
@ -25,10 +25,9 @@
|
||||
* Summary:
|
||||
* Test SuperWord vectorization with different access offsets
|
||||
* and various MaxVectorSize values, and +- AlignVector.
|
||||
* Note: CompileCommand Option Vectorize is enabled.
|
||||
*
|
||||
* Note: this test is auto-generated. Please modify / generate with script:
|
||||
* https://bugs.openjdk.org/browse/JDK-8298935
|
||||
* https://bugs.openjdk.org/browse/JDK-8308606
|
||||
*
|
||||
* Types: int, long, short, char, byte, float, double
|
||||
* Offsets: 0, -1, 1, -2, 2, -3, 3, -4, 4, -7, 7, -8, 8, -14, 14, -16, 16, -18, 18, -20, 20, -31, 31, -32, 32, -63, 63, -64, 64, -65, 65, -128, 128, -129, 129, -192, 192
|
||||
@ -91,7 +90,7 @@
|
||||
|
||||
/*
|
||||
* @test id=vanilla-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @library /test/lib /
|
||||
@ -100,7 +99,7 @@
|
||||
|
||||
/*
|
||||
* @test id=vanilla-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @library /test/lib /
|
||||
@ -109,7 +108,7 @@
|
||||
|
||||
/*
|
||||
* @test id=sse4-v016-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -120,7 +119,7 @@
|
||||
|
||||
/*
|
||||
* @test id=sse4-v016-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -131,7 +130,7 @@
|
||||
|
||||
/*
|
||||
* @test id=sse4-v008-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -142,7 +141,7 @@
|
||||
|
||||
/*
|
||||
* @test id=sse4-v008-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -153,7 +152,7 @@
|
||||
|
||||
/*
|
||||
* @test id=sse4-v004-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -164,7 +163,7 @@
|
||||
|
||||
/*
|
||||
* @test id=sse4-v004-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -175,7 +174,7 @@
|
||||
|
||||
/*
|
||||
* @test id=sse4-v002-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -186,7 +185,7 @@
|
||||
|
||||
/*
|
||||
* @test id=sse4-v002-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -197,7 +196,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx1-v032-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -208,7 +207,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx1-v032-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -219,7 +218,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx1-v016-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -230,7 +229,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx1-v016-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -241,7 +240,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx2-v032-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -252,7 +251,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx2-v032-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -263,7 +262,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx2-v016-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -274,7 +273,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx2-v016-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -285,7 +284,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx512-v064-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -296,7 +295,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx512-v064-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -307,7 +306,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx512-v032-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -318,7 +317,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx512-v032-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -329,7 +328,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx512bw-v064-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -340,7 +339,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx512bw-v064-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -351,7 +350,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx512bw-v032-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -362,7 +361,7 @@
|
||||
|
||||
/*
|
||||
* @test id=avx512bw-v032-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64")
|
||||
@ -373,7 +372,7 @@
|
||||
|
||||
/*
|
||||
* @test id=vec-v064-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
|
||||
@ -383,7 +382,7 @@
|
||||
|
||||
/*
|
||||
* @test id=vec-v064-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
|
||||
@ -393,7 +392,7 @@
|
||||
|
||||
/*
|
||||
* @test id=vec-v032-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
|
||||
@ -403,7 +402,7 @@
|
||||
|
||||
/*
|
||||
* @test id=vec-v032-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
|
||||
@ -413,7 +412,7 @@
|
||||
|
||||
/*
|
||||
* @test id=vec-v016-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
|
||||
@ -423,7 +422,7 @@
|
||||
|
||||
/*
|
||||
* @test id=vec-v016-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
|
||||
@ -433,7 +432,7 @@
|
||||
|
||||
/*
|
||||
* @test id=vec-v008-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
|
||||
@ -443,7 +442,7 @@
|
||||
|
||||
/*
|
||||
* @test id=vec-v008-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
|
||||
@ -453,7 +452,7 @@
|
||||
|
||||
/*
|
||||
* @test id=vec-v004-A
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
|
||||
@ -463,7 +462,7 @@
|
||||
|
||||
/*
|
||||
* @test id=vec-v004-U
|
||||
* @bug 8298935
|
||||
* @bug 8298935 8308606
|
||||
* @summary Test SuperWord: vector size, offsets, dependencies, alignment.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64")
|
||||
@ -1262,7 +1261,6 @@ public class TestDependencyOffsets {
|
||||
public static void main(String args[]) {
|
||||
TestFramework framework = new TestFramework(TestDependencyOffsets.class);
|
||||
framework.addFlags("-XX:-TieredCompilation",
|
||||
"-XX:CompileCommand=option,compiler.loopopts.superword.TestDependencyOffsets::test*,Vectorize",
|
||||
"-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestDependencyOffsets::init",
|
||||
"-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestDependencyOffsets::test*",
|
||||
"-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestDependencyOffsets::verify",
|
||||
|
@ -161,9 +161,11 @@ public class LoopArrayIndexComputeTest extends VectorizationTestRunner {
|
||||
}
|
||||
|
||||
@Test
|
||||
// Note that this case cannot be vectorized due to data dependence.
|
||||
@IR(failOn = {IRNode.STORE_VECTOR})
|
||||
public int[] indexWithDifferentConstants() {
|
||||
// No true dependency in read-forward case.
|
||||
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
counts = {IRNode.STORE_VECTOR, ">0"})
|
||||
public int[] indexWithDifferentConstantsPos() {
|
||||
int[] res = new int[SIZE];
|
||||
for (int i = 0; i < SIZE / 4; i++) {
|
||||
res[i] = ints[i + 1];
|
||||
@ -171,6 +173,17 @@ public class LoopArrayIndexComputeTest extends VectorizationTestRunner {
|
||||
return res;
|
||||
}
|
||||
|
||||
@Test
|
||||
// Note that this case cannot be vectorized due to data dependence.
|
||||
@IR(failOn = {IRNode.STORE_VECTOR})
|
||||
public int[] indexWithDifferentConstantsNeg() {
|
||||
int[] res = new int[SIZE];
|
||||
for (int i = 1; i < SIZE / 4; i++) {
|
||||
res[i] = ints[i - 1];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Test
|
||||
// Note that this case cannot be vectorized due to data dependence.
|
||||
@IR(failOn = {IRNode.STORE_VECTOR})
|
||||
@ -246,10 +259,13 @@ public class LoopArrayIndexComputeTest extends VectorizationTestRunner {
|
||||
}
|
||||
|
||||
// ---------------- Subword Type Arrays ----------------
|
||||
|
||||
@Test
|
||||
// Note that this case cannot be vectorized due to data dependence.
|
||||
@IR(failOn = {IRNode.STORE_VECTOR})
|
||||
public short[] shortArrayWithDependence() {
|
||||
// No true dependency in read-forward case.
|
||||
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
counts = {IRNode.STORE_VECTOR, ">0"})
|
||||
public short[] shortArrayWithDependencePos() {
|
||||
short[] res = new short[SIZE];
|
||||
System.arraycopy(shorts, 0, res, 0, SIZE);
|
||||
for (int i = 0; i < SIZE / 2; i++) {
|
||||
@ -261,7 +277,21 @@ public class LoopArrayIndexComputeTest extends VectorizationTestRunner {
|
||||
@Test
|
||||
// Note that this case cannot be vectorized due to data dependence.
|
||||
@IR(failOn = {IRNode.STORE_VECTOR})
|
||||
public char[] charArrayWithDependence() {
|
||||
public short[] shortArrayWithDependenceNeg() {
|
||||
short[] res = new short[SIZE];
|
||||
System.arraycopy(shorts, 0, res, 0, SIZE);
|
||||
for (int i = 1; i < SIZE / 2; i++) {
|
||||
res[i] *= shorts[i - 1];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Test
|
||||
// No true dependency in read-forward case.
|
||||
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
counts = {IRNode.STORE_VECTOR, ">0"})
|
||||
public char[] charArrayWithDependencePos() {
|
||||
char[] res = new char[SIZE];
|
||||
System.arraycopy(chars, 0, res, 0, SIZE);
|
||||
for (int i = 0; i < SIZE / 2; i++) {
|
||||
@ -273,7 +303,21 @@ public class LoopArrayIndexComputeTest extends VectorizationTestRunner {
|
||||
@Test
|
||||
// Note that this case cannot be vectorized due to data dependence.
|
||||
@IR(failOn = {IRNode.STORE_VECTOR})
|
||||
public byte[] byteArrayWithDependence() {
|
||||
public char[] charArrayWithDependenceNeg() {
|
||||
char[] res = new char[SIZE];
|
||||
System.arraycopy(chars, 0, res, 0, SIZE);
|
||||
for (int i = 2; i < SIZE / 2; i++) {
|
||||
res[i] *= chars[i - 2];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Test
|
||||
// No true dependency in read-forward case.
|
||||
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
counts = {IRNode.STORE_VECTOR, ">0"})
|
||||
public byte[] byteArrayWithDependencePos() {
|
||||
byte[] res = new byte[SIZE];
|
||||
System.arraycopy(bytes, 0, res, 0, SIZE);
|
||||
for (int i = 0; i < SIZE / 2; i++) {
|
||||
@ -282,10 +326,25 @@ public class LoopArrayIndexComputeTest extends VectorizationTestRunner {
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
// Note that this case cannot be vectorized due to data dependence.
|
||||
@IR(failOn = {IRNode.STORE_VECTOR})
|
||||
public boolean[] booleanArrayWithDependence() {
|
||||
public byte[] byteArrayWithDependenceNeg() {
|
||||
byte[] res = new byte[SIZE];
|
||||
System.arraycopy(bytes, 0, res, 0, SIZE);
|
||||
for (int i = 3; i < SIZE / 2; i++) {
|
||||
res[i] *= bytes[i - 3];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Test
|
||||
// No true dependency in read-forward case.
|
||||
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
counts = {IRNode.STORE_VECTOR, ">0"})
|
||||
public boolean[] booleanArrayWithDependencePos() {
|
||||
boolean[] res = new boolean[SIZE];
|
||||
System.arraycopy(booleans, 0, res, 0, SIZE);
|
||||
for (int i = 0; i < SIZE / 2; i++) {
|
||||
@ -294,6 +353,18 @@ public class LoopArrayIndexComputeTest extends VectorizationTestRunner {
|
||||
return res;
|
||||
}
|
||||
|
||||
@Test
|
||||
// Note that this case cannot be vectorized due to data dependence.
|
||||
@IR(failOn = {IRNode.STORE_VECTOR})
|
||||
public boolean[] booleanArrayWithDependenceNeg() {
|
||||
boolean[] res = new boolean[SIZE];
|
||||
System.arraycopy(booleans, 0, res, 0, SIZE);
|
||||
for (int i = 4; i < SIZE / 2; i++) {
|
||||
res[i] |= booleans[i - 4];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
// ---------------- Multiple Operations ----------------
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
|
315
test/micro/org/openjdk/bench/vm/compiler/VectorAlignment.java
Normal file
315
test/micro/org/openjdk/bench/vm/compiler/VectorAlignment.java
Normal file
@ -0,0 +1,315 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.vm.compiler;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
import org.openjdk.jmh.infra.*;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.Random;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Thread)
|
||||
@Warmup(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS)
|
||||
@Measurement(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS)
|
||||
@Fork(value = 1)
|
||||
public abstract class VectorAlignment {
|
||||
@Param({/*"512", "1024", */ "2048"})
|
||||
public int COUNT;
|
||||
|
||||
private int[] aI;
|
||||
private int[] bI;
|
||||
private int[] rI;
|
||||
|
||||
private long[] aL;
|
||||
private long[] bL;
|
||||
private long[] rL;
|
||||
|
||||
private short[] aS;
|
||||
private short[] bS;
|
||||
private short[] rS;
|
||||
|
||||
private char[] aC;
|
||||
private char[] bC;
|
||||
private char[] rC;
|
||||
|
||||
private byte[] aB;
|
||||
private byte[] bB;
|
||||
private byte[] rB;
|
||||
|
||||
private float[] aF;
|
||||
private float[] bF;
|
||||
private float[] rF;
|
||||
|
||||
private double[] aD;
|
||||
private double[] bD;
|
||||
private double[] rD;
|
||||
|
||||
|
||||
@Param("0")
|
||||
private int seed;
|
||||
private Random r = new Random(seed);
|
||||
|
||||
@Setup
|
||||
public void init() {
|
||||
aI = new int[COUNT];
|
||||
bI = new int[COUNT];
|
||||
rI = new int[COUNT];
|
||||
|
||||
aL = new long[COUNT];
|
||||
bL = new long[COUNT];
|
||||
rL = new long[COUNT];
|
||||
|
||||
aS = new short[COUNT];
|
||||
bS = new short[COUNT];
|
||||
rS = new short[COUNT];
|
||||
|
||||
aC = new char[COUNT];
|
||||
bC = new char[COUNT];
|
||||
rC = new char[COUNT];
|
||||
|
||||
aB = new byte[COUNT];
|
||||
bB = new byte[COUNT];
|
||||
rB = new byte[COUNT];
|
||||
|
||||
aF = new float[COUNT];
|
||||
bF = new float[COUNT];
|
||||
rF = new float[COUNT];
|
||||
|
||||
aD = new double[COUNT];
|
||||
bD = new double[COUNT];
|
||||
rD = new double[COUNT];
|
||||
|
||||
|
||||
for (int i = 0; i < COUNT; i++) {
|
||||
aI[i] = r.nextInt();
|
||||
bI[i] = r.nextInt();
|
||||
|
||||
aL[i] = r.nextLong();
|
||||
bL[i] = r.nextLong();
|
||||
|
||||
aS[i] = (short) r.nextInt();
|
||||
bS[i] = (short) r.nextInt();
|
||||
|
||||
aC[i] = (char) r.nextInt();
|
||||
bC[i] = (char) r.nextInt();
|
||||
|
||||
aB[i] = (byte) r.nextInt();
|
||||
bB[i] = (byte) r.nextInt();
|
||||
|
||||
aF[i] = r.nextFloat();
|
||||
bF[i] = r.nextFloat();
|
||||
|
||||
aD[i] = r.nextDouble();
|
||||
bD[i] = r.nextDouble();
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
// Control: should always vectorize with SuperWord
|
||||
public void bench000I_control() {
|
||||
for (int i = 0; i < COUNT; i++) {
|
||||
// Have multiple MUL operations to make loop compute bound (more compute than load/store)
|
||||
rI[i] = aI[i] * aI[i] * aI[i] * aI[i];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bench000L_control() {
|
||||
for (int i = 0; i < COUNT; i++) {
|
||||
rL[i] = aL[i] * aL[i] * aL[i] * aL[i];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bench000S_control() {
|
||||
for (int i = 0; i < COUNT; i++) {
|
||||
rS[i] = (short)(aS[i] * aS[i] * aS[i] * aS[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bench000C_control() {
|
||||
for (int i = 0; i < COUNT; i++) {
|
||||
rC[i] = (char)(aC[i] * aC[i] * aC[i] * aC[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bench000B_control() {
|
||||
for (int i = 0; i < COUNT; i++) {
|
||||
rB[i] = (byte)(aB[i] * aB[i] * aB[i] * aB[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bench000F_control() {
|
||||
for (int i = 0; i < COUNT; i++) {
|
||||
rF[i] = aF[i] * aF[i] * aF[i] * aF[i];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bench000D_control() {
|
||||
for (int i = 0; i < COUNT; i++) {
|
||||
rD[i] = aD[i] * aD[i] * aD[i] * aD[i];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
// Control: should always vectorize with SuperWord
|
||||
public void bench001_control() {
|
||||
for (int i = 0; i < COUNT; i++) {
|
||||
// Have multiple MUL operations to make loop compute bound (more compute than load/store)
|
||||
rI[i] = aI[i] * aI[i] * aI[i] * aI[i] + bI[i];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
// Vectorizes without AlignVector
|
||||
public void bench100I_misaligned_load() {
|
||||
for (int i = 0; i < COUNT-1; i++) {
|
||||
rI[i] = aI[i+1] * aI[i+1] * aI[i+1] * aI[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bench100L_misaligned_load() {
|
||||
for (int i = 0; i < COUNT-1; i++) {
|
||||
rL[i] = aL[i+1] * aL[i+1] * aL[i+1] * aL[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bench100S_misaligned_load() {
|
||||
for (int i = 0; i < COUNT-1; i++) {
|
||||
rS[i] = (short)(aS[i+1] * aS[i+1] * aS[i+1] * aS[i+1]);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bench100C_misaligned_load() {
|
||||
for (int i = 0; i < COUNT-1; i++) {
|
||||
rC[i] = (char)(aC[i+1] * aC[i+1] * aC[i+1] * aC[i+1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Benchmark
|
||||
public void bench100B_misaligned_load() {
|
||||
for (int i = 0; i < COUNT-1; i++) {
|
||||
rB[i] = (byte)(aB[i+1] * aB[i+1] * aB[i+1] * aB[i+1]);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bench100F_misaligned_load() {
|
||||
for (int i = 0; i < COUNT-1; i++) {
|
||||
rF[i] = aF[i+1] * aF[i+1] * aF[i+1] * aF[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bench100D_misaligned_load() {
|
||||
for (int i = 0; i < COUNT-1; i++) {
|
||||
rD[i] = aD[i+1] * aD[i+1] * aD[i+1] * aD[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
// Only without "Vectorize" (confused by hand-unrolling)
|
||||
public void bench200_hand_unrolled_aligned() {
|
||||
for (int i = 0; i < COUNT-10; i+=2) {
|
||||
rI[i+0] = aI[i+0] * aI[i+0] * aI[i+0] * aI[i+0];
|
||||
rI[i+1] = aI[i+1] * aI[i+1] * aI[i+1] * aI[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
// Only with "Vectorize", without we get issues with modulo computation of alignment for bI
|
||||
public void bench300_multiple_misaligned_loads() {
|
||||
for (int i = 0; i < COUNT-10; i++) {
|
||||
rI[i] = aI[i] * aI[i] * aI[i] * aI[i] + bI[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
// Only with "Vectorize", without we may confuse aI[5] with aI[4+1] and pack loads in wrong pack
|
||||
public void bench301_multiple_misaligned_loads() {
|
||||
for (int i = 0; i < COUNT-10; i++) {
|
||||
rI[i] = aI[i] * aI[i] * aI[i] * aI[i] + aI[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
// Only with "Vectorize", without we get mix of aI[i] and a[i-2]
|
||||
public void bench302_multiple_misaligned_loads_and_stores() {
|
||||
for (int i = 2; i < COUNT; i++) {
|
||||
rI[i - 2] = aI[i-2] * aI[i-2] * aI[i-2] * aI[i-2]; // can do this for all iterations
|
||||
rI[i] = aI[i] + 3; // before doing this second line
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
// Currently does not vectorize:
|
||||
// hand-unrolled confuses Vectorize -> adjacent loads not from same original node (not even same line)
|
||||
// multiple unaligned loads confuses non-Vectorize: aI[5+1] confused with aI[4+2] (plus modulo alignment issue)
|
||||
public void bench400_hand_unrolled_misaligned() {
|
||||
for (int i = 0; i < COUNT-10; i+=2) {
|
||||
rI[i+0] = aI[i+1] * aI[i+1] * aI[i+1] * aI[i+1] + aI[i];
|
||||
rI[i+1] = aI[i+2] * aI[i+2] * aI[i+2] * aI[i+2] + aI[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
// Currently does not vectorize:
|
||||
// hand-unrolled confuses Vectorize -> adjacent loads not from same original node (not even same line)
|
||||
// non-Vectorize: plus modulo alignment issue
|
||||
public void bench401_hand_unrolled_misaligned() {
|
||||
for (int i = 0; i < COUNT-10; i+=2) {
|
||||
rI[i+0] = aI[i+1] * aI[i+1] * aI[i+1] * aI[i+1] + bI[i];
|
||||
rI[i+1] = aI[i+2] * aI[i+2] * aI[i+2] * aI[i+2] + bI[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
@Fork(value = 1, jvmArgsPrepend = {
|
||||
"-XX:+UseSuperWord", "-XX:CompileCommand=Option,*::*,Vectorize"
|
||||
})
|
||||
public static class VectorAlignmentSuperWordWithVectorize extends VectorAlignment {}
|
||||
|
||||
@Fork(value = 1, jvmArgsPrepend = {
|
||||
"-XX:+UseSuperWord", "-XX:+AlignVector"
|
||||
})
|
||||
public static class VectorAlignmentSuperWordAlignVector extends VectorAlignment {}
|
||||
|
||||
@Fork(value = 1, jvmArgsPrepend = {
|
||||
"-XX:+UseSuperWord"
|
||||
})
|
||||
public static class VectorAlignmentSuperWord extends VectorAlignment {}
|
||||
|
||||
@Fork(value = 1, jvmArgsPrepend = {
|
||||
"-XX:-UseSuperWord"
|
||||
})
|
||||
public static class VectorAlignmentNoSuperWord extends VectorAlignment {}
|
||||
}
|
Loading…
Reference in New Issue
Block a user