8325155: C2 SuperWord: remove alignment boundaries
Reviewed-by: chagedorn, kvn
This commit is contained in:
parent
d8af58941b
commit
944aeb81b1
@ -43,7 +43,6 @@ SuperWord::SuperWord(const VLoopAnalyzer &vloop_analyzer) :
|
||||
_vloop_analyzer(vloop_analyzer),
|
||||
_vloop(vloop_analyzer.vloop()),
|
||||
_arena(mtCompiler),
|
||||
_node_info(arena(), _vloop.estimated_body_length(), 0, SWNodeInfo::initial), // info needed per node
|
||||
_clone_map(phase()->C->clone_map()), // map of nodes created in cloning
|
||||
_pairset(&_arena, _vloop_analyzer),
|
||||
_packset(&_arena, _vloop_analyzer
|
||||
@ -453,11 +452,8 @@ bool SuperWord::transform_loop() {
|
||||
bool SuperWord::SLP_extract() {
|
||||
assert(cl()->is_main_loop(), "SLP should only work on main loops");
|
||||
|
||||
// Ensure extra info is allocated.
|
||||
initialize_node_info();
|
||||
|
||||
// Attempt vectorization
|
||||
find_adjacent_refs();
|
||||
// Find "seed" pairs.
|
||||
create_adjacent_memop_pairs();
|
||||
|
||||
if (_pairset.is_empty()) {
|
||||
#ifndef PRODUCT
|
||||
@ -491,245 +487,133 @@ bool SuperWord::SLP_extract() {
|
||||
return output();
|
||||
}
|
||||
|
||||
//------------------------------find_adjacent_refs---------------------------
|
||||
// Find the adjacent memory references and create pack pairs for them.
|
||||
// We can find adjacent memory references by comparing their relative
|
||||
// alignment. Whether the final vectors can be aligned is determined later
|
||||
// once all vectors are extended and combined.
|
||||
void SuperWord::find_adjacent_refs() {
|
||||
// Get list of memory operations
|
||||
Node_List memops;
|
||||
for (int i = 0; i < body().length(); i++) {
|
||||
Node* n = body().at(i);
|
||||
if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
|
||||
is_java_primitive(n->as_Mem()->memory_type())) {
|
||||
int align = memory_alignment(n->as_Mem(), 0);
|
||||
if (align != bottom_align) {
|
||||
memops.push(n);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Find the "seed" memops pairs. These are pairs that we strongly suspect would lead to vectorization.
|
||||
void SuperWord::create_adjacent_memop_pairs() {
|
||||
ResourceMark rm;
|
||||
GrowableArray<const VPointer*> vpointers;
|
||||
|
||||
collect_valid_vpointers(vpointers);
|
||||
|
||||
// Sort the VPointers. This does 2 things:
|
||||
// - Separate the VPointer into groups: all memops that have the same opcode and the same
|
||||
// VPointer, except for the offset. Adjacent memops must have the same opcode and the
|
||||
// same VPointer, except for a shift in the offset. Thus, two memops can only be adjacent
|
||||
// if they are in the same group. This decreases the work.
|
||||
// - Sort by offset inside the groups. This decreases the work needed to determine adjacent
|
||||
// memops inside a group.
|
||||
vpointers.sort(VPointer::cmp_for_sort);
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (is_trace_superword_adjacent_memops()) {
|
||||
tty->print_cr("\nfind_adjacent_refs found %d memops", memops.size());
|
||||
tty->print_cr("\nSuperWord::create_adjacent_memop_pairs:");
|
||||
}
|
||||
#endif
|
||||
|
||||
int max_idx;
|
||||
|
||||
while (memops.size() != 0) {
|
||||
// Find a memory reference to align to.
|
||||
MemNode* mem_ref = find_align_to_ref(memops, max_idx);
|
||||
if (mem_ref == nullptr) break;
|
||||
int iv_adjustment = get_iv_adjustment(mem_ref);
|
||||
|
||||
const VPointer& align_to_ref_p = vpointer(mem_ref);
|
||||
// Set alignment relative to "align_to_ref" for all related memory operations.
|
||||
for (int i = memops.size() - 1; i >= 0; i--) {
|
||||
MemNode* s = memops.at(i)->as_Mem();
|
||||
if (isomorphic(s, mem_ref) &&
|
||||
(!_do_vector_loop || same_origin_idx(s, mem_ref))) {
|
||||
const VPointer& p2 = vpointer(s);
|
||||
if (p2.comparable(align_to_ref_p)) {
|
||||
int align = memory_alignment(s, iv_adjustment);
|
||||
set_alignment(s, align);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create initial pack pairs of memory operations for which alignment was set.
|
||||
for (uint i = 0; i < memops.size(); i++) {
|
||||
Node* s1 = memops.at(i);
|
||||
int align = alignment(s1);
|
||||
if (align == top_align) continue;
|
||||
for (uint j = 0; j < memops.size(); j++) {
|
||||
Node* s2 = memops.at(j);
|
||||
if (alignment(s2) == top_align) continue;
|
||||
if (s1 != s2 && are_adjacent_refs(s1, s2)) {
|
||||
if (stmts_can_pack(s1, s2, align)) {
|
||||
if (!_do_vector_loop || same_origin_idx(s1, s2)) {
|
||||
_pairset.add_pair(s1, s2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove used mem nodes.
|
||||
for (int i = memops.size() - 1; i >= 0; i--) {
|
||||
MemNode* m = memops.at(i)->as_Mem();
|
||||
if (alignment(m) != top_align) {
|
||||
memops.remove(i);
|
||||
}
|
||||
}
|
||||
} // while (memops.size() != 0)
|
||||
create_adjacent_memop_pairs_in_all_groups(vpointers);
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (is_trace_superword_packset()) {
|
||||
tty->print_cr("\nAfter Superword::find_adjacent_refs");
|
||||
tty->print_cr("\nAfter Superword::create_adjacent_memop_pairs");
|
||||
_pairset.print();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//------------------------------find_align_to_ref---------------------------
|
||||
// Find a memory reference to align the loop induction variable to.
|
||||
// Looks first at stores then at loads, looking for a memory reference
|
||||
// with the largest number of references similar to it.
|
||||
MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) {
|
||||
GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
|
||||
|
||||
// Count number of comparable memory ops
|
||||
for (uint i = 0; i < memops.size(); i++) {
|
||||
MemNode* s1 = memops.at(i)->as_Mem();
|
||||
const VPointer& p1 = vpointer(s1);
|
||||
for (uint j = i+1; j < memops.size(); j++) {
|
||||
MemNode* s2 = memops.at(j)->as_Mem();
|
||||
if (isomorphic(s1, s2)) {
|
||||
const VPointer& p2 = vpointer(s2);
|
||||
if (p1.comparable(p2)) {
|
||||
(*cmp_ct.adr_at(i))++;
|
||||
(*cmp_ct.adr_at(j))++;
|
||||
}
|
||||
}
|
||||
// Collect all memops vpointers that could potentially be vectorized.
|
||||
void SuperWord::collect_valid_vpointers(GrowableArray<const VPointer*>& vpointers) {
|
||||
for_each_mem([&] (const MemNode* mem, int bb_idx) {
|
||||
const VPointer& p = vpointer(mem);
|
||||
if (p.valid() &&
|
||||
!mem->is_LoadStore() &&
|
||||
is_java_primitive(mem->memory_type())) {
|
||||
vpointers.append(&p);
|
||||
}
|
||||
}
|
||||
|
||||
// Find Store (or Load) with the greatest number of "comparable" references,
|
||||
// biggest vector size, smallest data size and smallest iv offset.
|
||||
int max_ct = 0;
|
||||
int max_vw = 0;
|
||||
int max_idx = -1;
|
||||
int min_size = max_jint;
|
||||
int min_iv_offset = max_jint;
|
||||
for (uint j = 0; j < memops.size(); j++) {
|
||||
MemNode* s = memops.at(j)->as_Mem();
|
||||
if (s->is_Store()) {
|
||||
int vw = vector_width_in_bytes(s);
|
||||
assert(vw > 1, "sanity");
|
||||
const VPointer& p = vpointer(s);
|
||||
if ( cmp_ct.at(j) > max_ct ||
|
||||
(cmp_ct.at(j) == max_ct &&
|
||||
( vw > max_vw ||
|
||||
(vw == max_vw &&
|
||||
( data_size(s) < min_size ||
|
||||
(data_size(s) == min_size &&
|
||||
p.offset_in_bytes() < min_iv_offset)))))) {
|
||||
max_ct = cmp_ct.at(j);
|
||||
max_vw = vw;
|
||||
max_idx = j;
|
||||
min_size = data_size(s);
|
||||
min_iv_offset = p.offset_in_bytes();
|
||||
}
|
||||
}
|
||||
}
|
||||
// If no stores, look at loads
|
||||
if (max_ct == 0) {
|
||||
for (uint j = 0; j < memops.size(); j++) {
|
||||
MemNode* s = memops.at(j)->as_Mem();
|
||||
if (s->is_Load()) {
|
||||
int vw = vector_width_in_bytes(s);
|
||||
assert(vw > 1, "sanity");
|
||||
const VPointer& p = vpointer(s);
|
||||
if ( cmp_ct.at(j) > max_ct ||
|
||||
(cmp_ct.at(j) == max_ct &&
|
||||
( vw > max_vw ||
|
||||
(vw == max_vw &&
|
||||
( data_size(s) < min_size ||
|
||||
(data_size(s) == min_size &&
|
||||
p.offset_in_bytes() < min_iv_offset)))))) {
|
||||
max_ct = cmp_ct.at(j);
|
||||
max_vw = vw;
|
||||
max_idx = j;
|
||||
min_size = data_size(s);
|
||||
min_iv_offset = p.offset_in_bytes();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (is_trace_superword_verbose()) {
|
||||
tty->print_cr("\nVector memops after find_align_to_ref");
|
||||
for (uint i = 0; i < memops.size(); i++) {
|
||||
MemNode* s = memops.at(i)->as_Mem();
|
||||
s->dump();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
idx = max_idx;
|
||||
if (max_ct > 0) {
|
||||
#ifndef PRODUCT
|
||||
if (is_trace_superword_adjacent_memops()) {
|
||||
tty->print("SuperWord::find_align_to_ref: ");
|
||||
memops.at(max_idx)->as_Mem()->dump();
|
||||
}
|
||||
#endif
|
||||
return memops.at(max_idx)->as_Mem();
|
||||
}
|
||||
return nullptr;
|
||||
});
|
||||
}
|
||||
|
||||
//---------------------------get_vw_bytes_special------------------------
|
||||
int SuperWord::get_vw_bytes_special(MemNode* s) {
|
||||
// Get the vector width in bytes.
|
||||
int vw = vector_width_in_bytes(s);
|
||||
|
||||
// Check for special case where there is an MulAddS2I usage where short vectors are going to need combined.
|
||||
BasicType btype = velt_basic_type(s);
|
||||
if (type2aelembytes(btype) == 2) {
|
||||
bool should_combine_adjacent = true;
|
||||
for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
|
||||
Node* user = s->fast_out(i);
|
||||
if (!VectorNode::is_muladds2i(user)) {
|
||||
should_combine_adjacent = false;
|
||||
}
|
||||
}
|
||||
if (should_combine_adjacent) {
|
||||
vw = MIN2(Matcher::max_vector_size_auto_vectorization(btype)*type2aelembytes(btype), vw * 2);
|
||||
}
|
||||
// For each group, find the adjacent memops.
|
||||
void SuperWord::create_adjacent_memop_pairs_in_all_groups(const GrowableArray<const VPointer*> &vpointers) {
|
||||
int group_start = 0;
|
||||
while (group_start < vpointers.length()) {
|
||||
int group_end = find_group_end(vpointers, group_start);
|
||||
create_adjacent_memop_pairs_in_one_group(vpointers, group_start, group_end);
|
||||
group_start = group_end;
|
||||
}
|
||||
|
||||
// Check for special case where there is a type conversion between different data size.
|
||||
int vectsize = max_vector_size_in_def_use_chain(s);
|
||||
if (vectsize < Matcher::max_vector_size_auto_vectorization(btype)) {
|
||||
vw = MIN2(vectsize * type2aelembytes(btype), vw);
|
||||
}
|
||||
|
||||
return vw;
|
||||
}
|
||||
|
||||
//---------------------------get_iv_adjustment---------------------------
|
||||
// Calculate loop's iv adjustment for this memory ops.
|
||||
int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
|
||||
const VPointer& align_to_ref_p = vpointer(mem_ref);
|
||||
int offset = align_to_ref_p.offset_in_bytes();
|
||||
int scale = align_to_ref_p.scale_in_bytes();
|
||||
int elt_size = align_to_ref_p.memory_size();
|
||||
int vw = get_vw_bytes_special(mem_ref);
|
||||
assert(vw > 1, "sanity");
|
||||
int iv_adjustment;
|
||||
if (scale != 0) {
|
||||
int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
|
||||
// At least one iteration is executed in pre-loop by default. As result
|
||||
// several iterations are needed to align memory operations in main-loop even
|
||||
// if offset is 0.
|
||||
int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
|
||||
iv_adjustment = iv_adjustment_in_bytes/elt_size;
|
||||
} else {
|
||||
// This memory op is not dependent on iv (scale == 0)
|
||||
iv_adjustment = 0;
|
||||
// Step forward until we find a VPointer of another group, or we reach the end of the array.
|
||||
int SuperWord::find_group_end(const GrowableArray<const VPointer*>& vpointers, int group_start) {
|
||||
int group_end = group_start + 1;
|
||||
while (group_end < vpointers.length() &&
|
||||
VPointer::cmp_for_sort_by_group(
|
||||
vpointers.adr_at(group_start),
|
||||
vpointers.adr_at(group_end)
|
||||
) == 0) {
|
||||
group_end++;
|
||||
}
|
||||
return group_end;
|
||||
}
|
||||
|
||||
// Find adjacent memops for a single group, e.g. for all LoadI of the same base, invar, etc.
|
||||
// Create pairs and add them to the pairset.
|
||||
void SuperWord::create_adjacent_memop_pairs_in_one_group(const GrowableArray<const VPointer*>& vpointers, const int group_start, const int group_end) {
|
||||
#ifndef PRODUCT
|
||||
if (is_trace_superword_alignment()) {
|
||||
tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ",
|
||||
mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
|
||||
mem_ref->dump();
|
||||
if (is_trace_superword_adjacent_memops()) {
|
||||
tty->print_cr(" group:");
|
||||
for (int i = group_start; i < group_end; i++) {
|
||||
const VPointer* p = vpointers.at(i);
|
||||
tty->print(" ");
|
||||
p->print();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return iv_adjustment;
|
||||
|
||||
MemNode* first = vpointers.at(group_start)->mem();
|
||||
int element_size = data_size(first);
|
||||
|
||||
// For each ref in group: find others that can be paired:
|
||||
for (int i = group_start; i < group_end; i++) {
|
||||
const VPointer* p1 = vpointers.at(i);
|
||||
MemNode* mem1 = p1->mem();
|
||||
|
||||
bool found = false;
|
||||
// For each ref in group with larger or equal offset:
|
||||
for (int j = i + 1; j < group_end; j++) {
|
||||
const VPointer* p2 = vpointers.at(j);
|
||||
MemNode* mem2 = p2->mem();
|
||||
assert(mem1 != mem2, "look only at pair of different memops");
|
||||
|
||||
// Check for correct distance.
|
||||
assert(data_size(mem1) == element_size, "all nodes in group must have the same element size");
|
||||
assert(data_size(mem2) == element_size, "all nodes in group must have the same element size");
|
||||
assert(p1->offset_in_bytes() <= p2->offset_in_bytes(), "must be sorted by offset");
|
||||
if (p1->offset_in_bytes() + element_size > p2->offset_in_bytes()) { continue; }
|
||||
if (p1->offset_in_bytes() + element_size < p2->offset_in_bytes()) { break; }
|
||||
|
||||
// Only allow nodes from same origin idx to be packed (see CompileCommand Option Vectorize)
|
||||
if (_do_vector_loop && !same_origin_idx(mem1, mem2)) { continue; }
|
||||
|
||||
if (!can_pack_into_pair(mem1, mem2)) { continue; }
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (is_trace_superword_adjacent_memops()) {
|
||||
if (found) {
|
||||
tty->print_cr(" WARNING: multiple pairs with the same node. Ignored pairing:");
|
||||
} else {
|
||||
tty->print_cr(" pair:");
|
||||
}
|
||||
tty->print(" ");
|
||||
p1->print();
|
||||
tty->print(" ");
|
||||
p2->print();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!found) {
|
||||
_pairset.add_pair(mem1, mem2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VLoopMemorySlices::find_memory_slices() {
|
||||
@ -809,10 +693,8 @@ void VLoopMemorySlices::get_slice_in_reverse_order(PhiNode* head, MemNode* tail,
|
||||
#endif
|
||||
}
|
||||
|
||||
//------------------------------stmts_can_pack---------------------------
|
||||
// Can s1 and s2 be in a pack with s1 immediately preceding s2 and
|
||||
// s1 aligned at "align"
|
||||
bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
|
||||
// Check if two nodes can be packed into a pair.
|
||||
bool SuperWord::can_pack_into_pair(Node* s1, Node* s2) {
|
||||
|
||||
// Do not use superword for non-primitives
|
||||
BasicType bt1 = velt_basic_type(s1);
|
||||
@ -831,13 +713,7 @@ bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
|
||||
if ((independent(s1, s2) && have_similar_inputs(s1, s2)) || reduction(s1, s2)) {
|
||||
if (!_pairset.is_left(s1) && !_pairset.is_right(s2)) {
|
||||
if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
|
||||
int s1_align = alignment(s1);
|
||||
int s2_align = alignment(s2);
|
||||
if (s1_align == top_align || s1_align == align) {
|
||||
if (s2_align == top_align || s2_align == align + data_size(s1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1013,16 +889,6 @@ bool VLoopReductions::is_marked_reduction_pair(const Node* s1, const Node* s2) c
|
||||
return false;
|
||||
}
|
||||
|
||||
//------------------------------set_alignment---------------------------
|
||||
void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
|
||||
set_alignment(s1, align);
|
||||
if (align == top_align || align == bottom_align) {
|
||||
set_alignment(s2, align);
|
||||
} else {
|
||||
set_alignment(s2, align + data_size(s1));
|
||||
}
|
||||
}
|
||||
|
||||
// Extend pairset by following use->def and def->use links from pair members.
|
||||
void SuperWord::extend_pairset_with_more_pairs_by_following_use_and_def() {
|
||||
bool changed;
|
||||
@ -1058,57 +924,25 @@ void SuperWord::extend_pairset_with_more_pairs_by_following_use_and_def() {
|
||||
#endif
|
||||
}
|
||||
|
||||
//------------------------------adjust_alignment_for_type_conversion---------------------------------
|
||||
// Adjust the target alignment if conversion between different data size exists in def-use nodes.
|
||||
int SuperWord::adjust_alignment_for_type_conversion(Node* s, Node* t, int align) {
|
||||
// Do not use superword for non-primitives
|
||||
BasicType bt1 = velt_basic_type(s);
|
||||
BasicType bt2 = velt_basic_type(t);
|
||||
if (!is_java_primitive(bt1) || !is_java_primitive(bt2)) {
|
||||
return align;
|
||||
}
|
||||
if (longer_type_for_conversion(s) != T_ILLEGAL ||
|
||||
longer_type_for_conversion(t) != T_ILLEGAL) {
|
||||
align = align / data_size(s) * data_size(t);
|
||||
}
|
||||
return align;
|
||||
}
|
||||
|
||||
bool SuperWord::extend_pairset_with_more_pairs_by_following_def(Node* s1, Node* s2) {
|
||||
assert(_pairset.is_pair(s1, s2), "(s1, s2) must be a pair");
|
||||
assert(s1->req() == s2->req(), "just checking");
|
||||
assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
|
||||
|
||||
if (s1->is_Load()) return false;
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (is_trace_superword_alignment()) {
|
||||
tty->print_cr("SuperWord::extend_pairset_with_more_pairs_by_following_def: s1 %d, align %d",
|
||||
s1->_idx, alignment(s1));
|
||||
}
|
||||
#endif
|
||||
bool changed = false;
|
||||
int start = s1->is_Store() ? MemNode::ValueIn : 1;
|
||||
int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req();
|
||||
for (int j = start; j < end; j++) {
|
||||
int align = alignment(s1);
|
||||
Node* t1 = s1->in(j);
|
||||
Node* t2 = s2->in(j);
|
||||
if (!in_bb(t1) || !in_bb(t2) || t1->is_Mem() || t2->is_Mem()) {
|
||||
// Only follow non-memory nodes in block - we do not want to resurrect misaligned packs.
|
||||
continue;
|
||||
}
|
||||
align = adjust_alignment_for_type_conversion(s1, t1, align);
|
||||
if (stmts_can_pack(t1, t2, align)) {
|
||||
if (can_pack_into_pair(t1, t2)) {
|
||||
if (estimate_cost_savings_when_packing_as_pair(t1, t2) >= 0) {
|
||||
_pairset.add_pair(t1, t2);
|
||||
#ifndef PRODUCT
|
||||
if (is_trace_superword_alignment()) {
|
||||
tty->print_cr("SuperWord::extend_pairset_with_more_pairs_by_following_def: set_alignment(%d, %d, %d)",
|
||||
t1->_idx, t2->_idx, align);
|
||||
}
|
||||
#endif
|
||||
set_alignment(t1, t2, align);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
@ -1122,17 +956,9 @@ bool SuperWord::extend_pairset_with_more_pairs_by_following_def(Node* s1, Node*
|
||||
bool SuperWord::extend_pairset_with_more_pairs_by_following_use(Node* s1, Node* s2) {
|
||||
assert(_pairset.is_pair(s1, s2), "(s1, s2) must be a pair");
|
||||
assert(s1->req() == s2->req(), "just checking");
|
||||
assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
|
||||
|
||||
if (s1->is_Store()) return false;
|
||||
|
||||
int align = alignment(s1);
|
||||
#ifndef PRODUCT
|
||||
if (is_trace_superword_alignment()) {
|
||||
tty->print_cr("SuperWord::extend_pairset_with_more_pairs_by_following_use: s1 %d, align %d",
|
||||
s1->_idx, align);
|
||||
}
|
||||
#endif
|
||||
int savings = -1;
|
||||
Node* u1 = nullptr;
|
||||
Node* u2 = nullptr;
|
||||
@ -1150,28 +976,18 @@ bool SuperWord::extend_pairset_with_more_pairs_by_following_use(Node* s1, Node*
|
||||
}
|
||||
if (t2->Opcode() == Op_AddI && t2 == cl()->incr()) continue; // don't mess with the iv
|
||||
if (order_inputs_of_uses_to_match_def_pair(s1, s2, t1, t2) != PairOrderStatus::Ordered) { continue; }
|
||||
int adjusted_align = alignment(s1);
|
||||
adjusted_align = adjust_alignment_for_type_conversion(s1, t1, adjusted_align);
|
||||
if (stmts_can_pack(t1, t2, adjusted_align)) {
|
||||
if (can_pack_into_pair(t1, t2)) {
|
||||
int my_savings = estimate_cost_savings_when_packing_as_pair(t1, t2);
|
||||
if (my_savings > savings) {
|
||||
savings = my_savings;
|
||||
u1 = t1;
|
||||
u2 = t2;
|
||||
align = adjusted_align;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (savings >= 0) {
|
||||
_pairset.add_pair(u1, u2);
|
||||
#ifndef PRODUCT
|
||||
if (is_trace_superword_alignment()) {
|
||||
tty->print_cr("SuperWord::extend_pairset_with_more_pairs_by_following_use: set_alignment(%d, %d, %d)",
|
||||
u1->_idx, u2->_idx, align);
|
||||
}
|
||||
#endif
|
||||
set_alignment(u1, u2, align);
|
||||
return true; // changed
|
||||
}
|
||||
return false; // no change
|
||||
@ -1814,6 +1630,11 @@ uint SuperWord::max_implemented_size(const Node_List* pack) {
|
||||
}
|
||||
}
|
||||
|
||||
// Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros
|
||||
// returns int type, but Vector API for them returns long type. To unify
|
||||
// the implementation in backend, superword splits the vector implementation
|
||||
// for Java API into an execution node with long type plus another node
|
||||
// converting long to int.
|
||||
bool SuperWord::requires_long_to_int_conversion(int opc) {
|
||||
switch(opc) {
|
||||
case Op_PopCountL:
|
||||
@ -2948,7 +2769,17 @@ uint SuperWord::find_use_def_boundary(const Node_List* pack) const {
|
||||
bool SuperWord::is_vector_use(Node* use, int u_idx) const {
|
||||
Node_List* u_pk = get_pack(use);
|
||||
if (u_pk == nullptr) return false;
|
||||
if (is_marked_reduction(use)) return true;
|
||||
|
||||
// Reduction: first input is internal connection.
|
||||
if (is_marked_reduction(use) && u_idx == 1) {
|
||||
#ifdef ASSERT
|
||||
for (uint i = 1; i < u_pk->size(); i++) {
|
||||
assert(u_pk->at(i - 1) == u_pk->at(i)->in(1), "internal connection");
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
Node* def = use->in(u_idx);
|
||||
Node_List* d_pk = get_pack(def);
|
||||
if (d_pk == nullptr) {
|
||||
@ -2975,51 +2806,64 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!is_velt_basic_type_compatible_use_def(use, def)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (VectorNode::is_muladds2i(use)) {
|
||||
// MulAddS2I takes shorts and produces ints - hence the special checks
|
||||
// on alignment and size.
|
||||
// MulAddS2I takes shorts and produces ints.
|
||||
if (u_pk->size() * 2 != d_pk->size()) {
|
||||
return false;
|
||||
}
|
||||
for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) {
|
||||
Node* ui = u_pk->at(i);
|
||||
Node* di = d_pk->at(i);
|
||||
if (alignment(ui) != alignment(di) * 2) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (u_pk->size() != d_pk->size())
|
||||
if (u_pk->size() != d_pk->size()) {
|
||||
return false;
|
||||
|
||||
if (longer_type_for_conversion(use) != T_ILLEGAL) {
|
||||
// These opcodes take a type of a kind of size and produce a type of
|
||||
// another size - hence the special checks on alignment and size.
|
||||
for (uint i = 0; i < u_pk->size(); i++) {
|
||||
Node* ui = u_pk->at(i);
|
||||
Node* di = d_pk->at(i);
|
||||
if (ui->in(u_idx) != di) {
|
||||
return false;
|
||||
}
|
||||
if (alignment(ui) / type2aelembytes(velt_basic_type(ui)) !=
|
||||
alignment(di) / type2aelembytes(velt_basic_type(di))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
for (uint i = 0; i < u_pk->size(); i++) {
|
||||
Node* ui = u_pk->at(i);
|
||||
Node* di = d_pk->at(i);
|
||||
if (ui->in(u_idx) != di || alignment(ui) != alignment(di))
|
||||
if (ui->in(u_idx) != di) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the output type of def is compatible with the input type of use, i.e. if the
|
||||
// types have the same size.
|
||||
bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def) const {
|
||||
assert(in_bb(def) && in_bb(use), "both use and def are in loop");
|
||||
|
||||
// Conversions are trivially compatible.
|
||||
if (VectorNode::is_convert_opcode(use->Opcode())) {
|
||||
return true;
|
||||
}
|
||||
|
||||
BasicType use_bt = velt_basic_type(use);
|
||||
BasicType def_bt = velt_basic_type(def);
|
||||
|
||||
assert(is_java_primitive(use_bt), "sanity %s", type2name(use_bt));
|
||||
assert(is_java_primitive(def_bt), "sanity %s", type2name(def_bt));
|
||||
|
||||
// Nodes like Long.bitCount: expect long input, and int output.
|
||||
if (requires_long_to_int_conversion(use->Opcode())) {
|
||||
return type2aelembytes(def_bt) == 8 &&
|
||||
type2aelembytes(use_bt) == 4;
|
||||
}
|
||||
|
||||
// MulAddS2I: expect short input, and int output.
|
||||
if (VectorNode::is_muladds2i(use)) {
|
||||
return type2aelembytes(def_bt) == 2 &&
|
||||
type2aelembytes(use_bt) == 4;
|
||||
}
|
||||
|
||||
// Default case: input size of use equals output size of def.
|
||||
return type2aelembytes(use_bt) == type2aelembytes(def_bt);
|
||||
}
|
||||
|
||||
// Return nullptr if success, else failure message
|
||||
VStatus VLoopBody::construct() {
|
||||
assert(_body.is_empty(), "body is empty");
|
||||
@ -3150,12 +2994,6 @@ VStatus VLoopBody::construct() {
|
||||
return VStatus::make_success();
|
||||
}
|
||||
|
||||
// Initialize per node info
|
||||
void SuperWord::initialize_node_info() {
|
||||
Node* last = body().at(body().length() - 1);
|
||||
grow_node_info(bb_idx(last));
|
||||
}
|
||||
|
||||
BasicType SuperWord::longer_type_for_conversion(Node* n) const {
|
||||
if (!(VectorNode::is_convert_opcode(n->Opcode()) ||
|
||||
requires_long_to_int_conversion(n->Opcode())) ||
|
||||
@ -3177,34 +3015,6 @@ BasicType SuperWord::longer_type_for_conversion(Node* n) const {
|
||||
: (src_size > dst_size ? src_t : dst_t);
|
||||
}
|
||||
|
||||
int SuperWord::max_vector_size_in_def_use_chain(Node* n) {
|
||||
BasicType bt = velt_basic_type(n);
|
||||
BasicType vt = bt;
|
||||
|
||||
// find the longest type among def nodes.
|
||||
uint start, end;
|
||||
VectorNode::vector_operands(n, &start, &end);
|
||||
for (uint i = start; i < end; ++i) {
|
||||
Node* input = n->in(i);
|
||||
if (!in_bb(input)) continue;
|
||||
BasicType newt = longer_type_for_conversion(input);
|
||||
vt = (newt == T_ILLEGAL) ? vt : newt;
|
||||
}
|
||||
|
||||
// find the longest type among use nodes.
|
||||
for (uint i = 0; i < n->outcnt(); ++i) {
|
||||
Node* output = n->raw_out(i);
|
||||
if (!in_bb(output)) continue;
|
||||
BasicType newt = longer_type_for_conversion(output);
|
||||
vt = (newt == T_ILLEGAL) ? vt : newt;
|
||||
}
|
||||
|
||||
int max = Matcher::max_vector_size_auto_vectorization(vt);
|
||||
// If now there is no vectors for the longest type, the nodes with the longest
|
||||
// type in the def-use chain are not packed in SuperWord::stmts_can_pack.
|
||||
return max < 2 ? Matcher::max_vector_size_auto_vectorization(bt) : max;
|
||||
}
|
||||
|
||||
void VLoopTypes::compute_vector_element_type() {
|
||||
#ifndef PRODUCT
|
||||
if (_vloop.is_trace_vector_element_type()) {
|
||||
@ -3308,36 +3118,6 @@ void VLoopTypes::compute_vector_element_type() {
|
||||
#endif
|
||||
}
|
||||
|
||||
//------------------------------memory_alignment---------------------------
|
||||
// Alignment within a vector memory reference
|
||||
int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
|
||||
#ifndef PRODUCT
|
||||
if (is_trace_superword_alignment()) {
|
||||
tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump();
|
||||
}
|
||||
#endif
|
||||
const VPointer& p = vpointer(s);
|
||||
if (!p.valid()) {
|
||||
NOT_PRODUCT(if(is_trace_superword_alignment()) tty->print_cr("SuperWord::memory_alignment: VPointer p invalid, return bottom_align");)
|
||||
return bottom_align;
|
||||
}
|
||||
int vw = get_vw_bytes_special(s);
|
||||
if (vw < 2) {
|
||||
NOT_PRODUCT(if(is_trace_superword_alignment()) tty->print_cr("SuperWord::memory_alignment: vector_width_in_bytes < 2, return bottom_align");)
|
||||
return bottom_align; // No vectors for this type
|
||||
}
|
||||
int offset = p.offset_in_bytes();
|
||||
offset += iv_adjust*p.memory_size();
|
||||
int off_rem = offset % vw;
|
||||
int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
|
||||
#ifndef PRODUCT
|
||||
if (is_trace_superword_alignment()) {
|
||||
tty->print_cr("SuperWord::memory_alignment: off_rem = %d, off_mod = %d (offset = %d)", off_rem, off_mod, offset);
|
||||
}
|
||||
#endif
|
||||
return off_mod;
|
||||
}
|
||||
|
||||
// Smallest type containing range of values
|
||||
const Type* VLoopTypes::container_type(Node* n) const {
|
||||
if (n->is_Mem()) {
|
||||
@ -3794,10 +3574,6 @@ void VLoopBody::print() const {
|
||||
}
|
||||
#endif
|
||||
|
||||
// ========================= SWNodeInfo =====================
|
||||
|
||||
const SWNodeInfo SWNodeInfo::initial;
|
||||
|
||||
//
|
||||
// --------------------------------- vectorization/simd -----------------------------------
|
||||
//
|
||||
|
@ -384,18 +384,6 @@ public:
|
||||
NOT_PRODUCT(static void print_pack(Node_List* pack);)
|
||||
};
|
||||
|
||||
// ========================= SuperWord =====================
|
||||
|
||||
// -----------------------------SWNodeInfo---------------------------------
|
||||
// Per node info needed by SuperWord
|
||||
class SWNodeInfo {
|
||||
public:
|
||||
int _alignment; // memory alignment for a node
|
||||
|
||||
SWNodeInfo() : _alignment(-1) {}
|
||||
static const SWNodeInfo initial;
|
||||
};
|
||||
|
||||
// -----------------------------SuperWord---------------------------------
|
||||
// Transforms scalar operations into packed (superword) operations.
|
||||
class SuperWord : public ResourceObj {
|
||||
@ -407,9 +395,6 @@ class SuperWord : public ResourceObj {
|
||||
// VSharedData, and reused over many AutoVectorizations.
|
||||
Arena _arena;
|
||||
|
||||
enum consts { top_align = -1, bottom_align = -666 };
|
||||
|
||||
GrowableArray<SWNodeInfo> _node_info; // Info needed per node
|
||||
CloneMap& _clone_map; // map of nodes created in cloning
|
||||
|
||||
PairSet _pairset;
|
||||
@ -461,6 +446,11 @@ class SuperWord : public ResourceObj {
|
||||
return _vloop_analyzer.body().bb_idx(n);
|
||||
}
|
||||
|
||||
template<typename Callback>
|
||||
void for_each_mem(Callback callback) const {
|
||||
return _vloop_analyzer.body().for_each_mem(callback);
|
||||
}
|
||||
|
||||
// VLoopTypes accessors
|
||||
const Type* velt_type(Node* n) const {
|
||||
return _vloop_analyzer.types().velt_type(n);
|
||||
@ -506,11 +496,6 @@ class SuperWord : public ResourceObj {
|
||||
|
||||
#ifndef PRODUCT
|
||||
// TraceAutoVectorization and TraceSuperWord
|
||||
bool is_trace_superword_alignment() const {
|
||||
// Too verbose for TraceSuperWord
|
||||
return _vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_ALIGNMENT);
|
||||
}
|
||||
|
||||
bool is_trace_superword_adjacent_memops() const {
|
||||
return TraceSuperWord ||
|
||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_ADJACENT_MEMOPS);
|
||||
@ -531,15 +516,9 @@ class SuperWord : public ResourceObj {
|
||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_INFO);
|
||||
}
|
||||
|
||||
bool is_trace_superword_verbose() const {
|
||||
// Too verbose for TraceSuperWord
|
||||
return _vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_VERBOSE);
|
||||
}
|
||||
|
||||
bool is_trace_superword_any() const {
|
||||
return TraceSuperWord ||
|
||||
is_trace_align_vector() ||
|
||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_ALIGNMENT) ||
|
||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_ADJACENT_MEMOPS) ||
|
||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_REJECTIONS) ||
|
||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_PACKSET) ||
|
||||
@ -549,7 +528,7 @@ class SuperWord : public ResourceObj {
|
||||
|
||||
bool is_trace_align_vector() const {
|
||||
return _vloop.vtrace().is_trace(TraceAutoVectorizationTag::ALIGN_VECTOR) ||
|
||||
is_trace_superword_verbose();
|
||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_VERBOSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -566,37 +545,28 @@ class SuperWord : public ResourceObj {
|
||||
// Accessors
|
||||
Arena* arena() { return &_arena; }
|
||||
|
||||
int get_vw_bytes_special(MemNode* s);
|
||||
|
||||
// Ensure node_info contains element "i"
|
||||
void grow_node_info(int i) { if (i >= _node_info.length()) _node_info.at_put_grow(i, SWNodeInfo::initial); }
|
||||
|
||||
// should we align vector memory references on this platform?
|
||||
bool vectors_should_be_aligned() { return !Matcher::misaligned_vectors_ok() || AlignVector; }
|
||||
|
||||
// memory alignment for a node
|
||||
int alignment(Node* n) const { return _node_info.adr_at(bb_idx(n))->_alignment; }
|
||||
void set_alignment(Node* n, int a) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; }
|
||||
|
||||
// is pack good for converting into one vector node replacing bunches of Cmp, Bool, CMov nodes.
|
||||
static bool requires_long_to_int_conversion(int opc);
|
||||
// For pack p, are all idx operands the same?
|
||||
bool same_inputs(const Node_List* p, int idx) const;
|
||||
|
||||
// CloneMap utilities
|
||||
bool same_origin_idx(Node* a, Node* b) const;
|
||||
bool same_generation(Node* a, Node* b) const;
|
||||
|
||||
private:
|
||||
bool SLP_extract();
|
||||
// Find the adjacent memory references and create pack pairs for them.
|
||||
void find_adjacent_refs();
|
||||
// Find a memory reference to align the loop induction variable to.
|
||||
MemNode* find_align_to_ref(Node_List &memops, int &idx);
|
||||
// Calculate loop's iv adjustment for this memory ops.
|
||||
int get_iv_adjustment(MemNode* mem);
|
||||
|
||||
// Can s1 and s2 be in a pack with s1 immediately preceding s2 and s1 aligned at "align"
|
||||
bool stmts_can_pack(Node* s1, Node* s2, int align);
|
||||
// Find the "seed" memops pairs. These are pairs that we strongly suspect would lead to vectorization.
|
||||
void create_adjacent_memop_pairs();
|
||||
void collect_valid_vpointers(GrowableArray<const VPointer*>& vpointers);
|
||||
void create_adjacent_memop_pairs_in_all_groups(const GrowableArray<const VPointer*>& vpointers);
|
||||
static int find_group_end(const GrowableArray<const VPointer*>& vpointers, int group_start);
|
||||
void create_adjacent_memop_pairs_in_one_group(const GrowableArray<const VPointer*>& vpointers, const int group_start, int group_end);
|
||||
|
||||
// Various methods to check if we can pack two nodes.
|
||||
bool can_pack_into_pair(Node* s1, Node* s2);
|
||||
// Is s1 immediately before s2 in memory?
|
||||
bool are_adjacent_refs(Node* s1, Node* s2) const;
|
||||
// Are s1 and s2 similar?
|
||||
@ -606,8 +576,6 @@ private:
|
||||
// For a node pair (s1, s2) which is isomorphic and independent,
|
||||
// do s1 and s2 have similar input edges?
|
||||
bool have_similar_inputs(Node* s1, Node* s2);
|
||||
void set_alignment(Node* s1, Node* s2, int align);
|
||||
int adjust_alignment_for_type_conversion(Node* s, Node* t, int align);
|
||||
|
||||
void extend_pairset_with_more_pairs_by_following_use_and_def();
|
||||
bool extend_pairset_with_more_pairs_by_following_def(Node* s1, Node* s2);
|
||||
@ -661,16 +629,15 @@ private:
|
||||
// Is use->in(u_idx) a vector use?
|
||||
bool is_vector_use(Node* use, int u_idx) const;
|
||||
|
||||
// Initialize per node info
|
||||
void initialize_node_info();
|
||||
// Return the longer type for vectorizable type-conversion node or illegal type for other nodes.
|
||||
BasicType longer_type_for_conversion(Node* n) const;
|
||||
// Find the longest type in def-use chain for packed nodes, and then compute the max vector size.
|
||||
int max_vector_size_in_def_use_chain(Node* n);
|
||||
|
||||
static bool requires_long_to_int_conversion(int opc);
|
||||
|
||||
bool is_velt_basic_type_compatible_use_def(Node* use, Node* def) const;
|
||||
|
||||
static LoadNode::ControlDependency control_dependency(Node_List* p);
|
||||
// Alignment within a vector memory reference
|
||||
int memory_alignment(MemNode* s, int iv_adjust);
|
||||
|
||||
// Ensure that the main loop vectors are aligned by adjusting the pre loop limit.
|
||||
void determine_mem_ref_and_aw_for_main_loop_alignment();
|
||||
void adjust_pre_loop_limit_to_align_main_loop_vectors();
|
||||
|
@ -37,8 +37,7 @@
|
||||
flags(TYPES, "Trace VLoopTypes") \
|
||||
flags(POINTERS, "Trace VLoopPointers") \
|
||||
flags(DEPENDENCY_GRAPH, "Trace VLoopDependencyGraph") \
|
||||
flags(SW_ALIGNMENT, "Trace SuperWord alignment analysis") \
|
||||
flags(SW_ADJACENT_MEMOPS, "Trace SuperWord::find_adjacent_refs") \
|
||||
flags(SW_ADJACENT_MEMOPS, "Trace SuperWord::find_adjacent_memop_pairs") \
|
||||
flags(SW_REJECTIONS, "Trace SuperWord rejections (non vectorizations)") \
|
||||
flags(SW_PACKSET, "Trace SuperWord packset at different stages") \
|
||||
flags(SW_INFO, "Trace SuperWord info (equivalent to TraceSuperWord)") \
|
||||
@ -115,7 +114,6 @@ class TraceAutoVectorizationTagValidator {
|
||||
} else if (ALL == tag) {
|
||||
_tags.set_range(0, TRACE_AUTO_VECTORIZATION_TAG_NUM);
|
||||
} else if (SW_VERBOSE == tag) {
|
||||
_tags.at_put(SW_ALIGNMENT, set_bit);
|
||||
_tags.at_put(SW_ADJACENT_MEMOPS, set_bit);
|
||||
_tags.at_put(SW_REJECTIONS, set_bit);
|
||||
_tags.at_put(SW_PACKSET, set_bit);
|
||||
|
@ -202,7 +202,7 @@ void VLoopVPointers::allocate_vpointers_array() {
|
||||
|
||||
void VLoopVPointers::compute_and_cache_vpointers() {
|
||||
int pointers_idx = 0;
|
||||
_body.for_each_mem([&] (const MemNode* mem, int bb_idx) {
|
||||
_body.for_each_mem([&] (MemNode* const mem, int bb_idx) {
|
||||
// Placement new: construct directly into the array.
|
||||
::new (&_vpointers[pointers_idx]) VPointer(mem, _vloop);
|
||||
_bb_idx_to_vpointer.at_put(bb_idx, pointers_idx);
|
||||
@ -410,7 +410,7 @@ void VLoopDependencyGraph::PredsIterator::next() {
|
||||
int VPointer::Tracer::_depth = 0;
|
||||
#endif
|
||||
|
||||
VPointer::VPointer(const MemNode* mem, const VLoop& vloop,
|
||||
VPointer::VPointer(MemNode* const mem, const VLoop& vloop,
|
||||
Node_Stack* nstack, bool analyze_only) :
|
||||
_mem(mem), _vloop(vloop),
|
||||
_base(nullptr), _adr(nullptr), _scale(0), _offset(0), _invar(nullptr),
|
||||
@ -807,10 +807,50 @@ void VPointer::maybe_add_to_invar(Node* new_invar, bool negate) {
|
||||
_invar = register_if_new(add);
|
||||
}
|
||||
|
||||
// To be in the same group, two VPointers must be the same,
|
||||
// except for the offset.
|
||||
int VPointer::cmp_for_sort_by_group(const VPointer** p1, const VPointer** p2) {
|
||||
const VPointer* a = *p1;
|
||||
const VPointer* b = *p2;
|
||||
|
||||
int cmp_base = a->base()->_idx - b->base()->_idx;
|
||||
if (cmp_base != 0) { return cmp_base; }
|
||||
|
||||
int cmp_opcode = a->mem()->Opcode() - b->mem()->Opcode();
|
||||
if (cmp_opcode != 0) { return cmp_opcode; }
|
||||
|
||||
int cmp_scale = a->scale_in_bytes() - b->scale_in_bytes();
|
||||
if (cmp_scale != 0) { return cmp_scale; }
|
||||
|
||||
int cmp_invar = (a->invar() == nullptr ? 0 : a->invar()->_idx) -
|
||||
(b->invar() == nullptr ? 0 : b->invar()->_idx);
|
||||
return cmp_invar;
|
||||
}
|
||||
|
||||
// We compare by group, then by offset, and finally by node idx.
|
||||
int VPointer::cmp_for_sort(const VPointer** p1, const VPointer** p2) {
|
||||
int cmp_group = cmp_for_sort_by_group(p1, p2);
|
||||
if (cmp_group != 0) { return cmp_group; }
|
||||
|
||||
const VPointer* a = *p1;
|
||||
const VPointer* b = *p2;
|
||||
|
||||
int cmp_offset = a->offset_in_bytes() - b->offset_in_bytes();
|
||||
if (cmp_offset != 0) { return cmp_offset; }
|
||||
|
||||
return a->mem()->_idx - b->mem()->_idx;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
// Function for printing the fields of a VPointer
|
||||
void VPointer::print() const {
|
||||
tty->print("VPointer[mem: %4d %10s, ", _mem->_idx, _mem->Name());
|
||||
|
||||
if (!valid()) {
|
||||
tty->print_cr("invalid]");
|
||||
return;
|
||||
}
|
||||
|
||||
tty->print("base: %4d, ", _base != nullptr ? _base->_idx : 0);
|
||||
tty->print("adr: %4d, ", _adr != nullptr ? _adr->_idx : 0);
|
||||
|
||||
|
@ -669,7 +669,7 @@ private:
|
||||
// operation in a counted loop for vectorizable analysis.
|
||||
class VPointer : public ArenaObj {
|
||||
protected:
|
||||
const MemNode* _mem; // My memory reference node
|
||||
MemNode* const _mem; // My memory reference node
|
||||
const VLoop& _vloop;
|
||||
|
||||
Node* _base; // null if unsafe nonheap reference
|
||||
@ -711,12 +711,12 @@ class VPointer : public ArenaObj {
|
||||
NotComparable = (Less | Greater | Equal)
|
||||
};
|
||||
|
||||
VPointer(const MemNode* mem, const VLoop& vloop) :
|
||||
VPointer(MemNode* const mem, const VLoop& vloop) :
|
||||
VPointer(mem, vloop, nullptr, false) {}
|
||||
VPointer(const MemNode* mem, const VLoop& vloop, Node_Stack* nstack) :
|
||||
VPointer(MemNode* const mem, const VLoop& vloop, Node_Stack* nstack) :
|
||||
VPointer(mem, vloop, nstack, true) {}
|
||||
private:
|
||||
VPointer(const MemNode* mem, const VLoop& vloop,
|
||||
VPointer(MemNode* const mem, const VLoop& vloop,
|
||||
Node_Stack* nstack, bool analyze_only);
|
||||
// Following is used to create a temporary object during
|
||||
// the pattern match of an address expression.
|
||||
@ -729,7 +729,7 @@ class VPointer : public ArenaObj {
|
||||
|
||||
Node* base() const { return _base; }
|
||||
Node* adr() const { return _adr; }
|
||||
const MemNode* mem() const { return _mem; }
|
||||
MemNode* mem() const { return _mem; }
|
||||
int scale_in_bytes() const { return _scale; }
|
||||
Node* invar() const { return _invar; }
|
||||
int offset_in_bytes() const { return _offset; }
|
||||
@ -781,6 +781,11 @@ class VPointer : public ArenaObj {
|
||||
static bool equal(int cmp) { return cmp == Equal; }
|
||||
static bool comparable(int cmp) { return cmp < NotComparable; }
|
||||
|
||||
// We need to be able to sort the VPointer to efficiently group the
|
||||
// memops into groups, and to find adjacent memops.
|
||||
static int cmp_for_sort_by_group(const VPointer** p1, const VPointer** p2);
|
||||
static int cmp_for_sort(const VPointer** p1, const VPointer** p2);
|
||||
|
||||
NOT_PRODUCT( void print() const; )
|
||||
|
||||
#ifndef PRODUCT
|
||||
|
@ -0,0 +1,474 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.loopopts.superword;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
import jdk.test.lib.Utils;
|
||||
import jdk.test.whitebox.WhiteBox;
|
||||
import java.lang.reflect.Array;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.Random;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8325155
|
||||
* @summary Test some cases that vectorize after the removal of the alignment boundaries code.
|
||||
* Now, we instead check if use-def connections have compatible type size.
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.loopopts.superword.TestCompatibleUseDefTypeSize
|
||||
*/
|
||||
|
||||
public class TestCompatibleUseDefTypeSize {
|
||||
static int RANGE = 1024*8;
|
||||
private static final Random RANDOM = Utils.getRandomInstance();
|
||||
|
||||
// Inputs
|
||||
byte[] aB;
|
||||
byte[] bB;
|
||||
short[] aS;
|
||||
short[] bS;
|
||||
char[] aC;
|
||||
char[] bC;
|
||||
int[] aI;
|
||||
int[] bI;
|
||||
long[] aL;
|
||||
long[] bL;
|
||||
float[] aF;
|
||||
float[] bF;
|
||||
double[] aD;
|
||||
double[] bD;
|
||||
|
||||
// List of tests
|
||||
Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
|
||||
|
||||
// List of gold, the results from the first run before compilation
|
||||
Map<String,Object[]> golds = new HashMap<String,Object[]>();
|
||||
|
||||
interface TestFunction {
|
||||
Object[] run();
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework.run();
|
||||
}
|
||||
|
||||
public TestCompatibleUseDefTypeSize() {
|
||||
// Generate input once
|
||||
aB = generateB();
|
||||
bB = generateB();
|
||||
aS = generateS();
|
||||
bS = generateS();
|
||||
aC = generateC();
|
||||
bC = generateC();
|
||||
aI = generateI();
|
||||
bI = generateI();
|
||||
aL = generateL();
|
||||
bL = generateL();
|
||||
aF = generateF();
|
||||
bF = generateF();
|
||||
aD = generateD();
|
||||
bD = generateD();
|
||||
|
||||
// Add all tests to list
|
||||
tests.put("test0", () -> { return test0(aB.clone(), bC.clone()); });
|
||||
tests.put("test1", () -> { return test1(aB.clone(), bC.clone()); });
|
||||
tests.put("test2", () -> { return test2(aB.clone(), bC.clone()); });
|
||||
tests.put("test3", () -> { return test3(aI.clone(), bI.clone()); });
|
||||
tests.put("test4", () -> { return test4(aI.clone(), bI.clone()); });
|
||||
tests.put("test5", () -> { return test5(aI.clone(), bF.clone()); });
|
||||
tests.put("test6", () -> { return test6(aI.clone(), bF.clone()); });
|
||||
tests.put("test7", () -> { return test7(aI.clone(), bF.clone()); });
|
||||
tests.put("test8", () -> { return test8(aL.clone(), bD.clone()); });
|
||||
tests.put("test9", () -> { return test9(aL.clone(), bD.clone()); });
|
||||
tests.put("test10", () -> { return test10(aL.clone(), bD.clone()); });
|
||||
tests.put("test11", () -> { return test11(aC.clone()); });
|
||||
|
||||
// Compute gold value for all test methods before compilation
|
||||
for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
|
||||
String name = entry.getKey();
|
||||
TestFunction test = entry.getValue();
|
||||
Object[] gold = test.run();
|
||||
golds.put(name, gold);
|
||||
}
|
||||
}
|
||||
|
||||
@Warmup(100)
|
||||
@Run(test = {"test0",
|
||||
"test1",
|
||||
"test2",
|
||||
"test3",
|
||||
"test4",
|
||||
"test5",
|
||||
"test6",
|
||||
"test7",
|
||||
"test8",
|
||||
"test9",
|
||||
"test10",
|
||||
"test11"})
|
||||
public void runTests() {
|
||||
for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
|
||||
String name = entry.getKey();
|
||||
TestFunction test = entry.getValue();
|
||||
// Recall gold value from before compilation
|
||||
Object[] gold = golds.get(name);
|
||||
// Compute new result
|
||||
Object[] result = test.run();
|
||||
// Compare gold and new result
|
||||
verify(name, gold, result);
|
||||
}
|
||||
}
|
||||
|
||||
static byte[] generateB() {
|
||||
byte[] a = new byte[RANGE];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = (byte)RANDOM.nextInt();
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
static short[] generateS() {
|
||||
short[] a = new short[RANGE];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = (short)RANDOM.nextInt();
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
static char[] generateC() {
|
||||
char[] a = new char[RANGE];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = (char)RANDOM.nextInt();
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
static int[] generateI() {
|
||||
int[] a = new int[RANGE];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = RANDOM.nextInt();
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
static long[] generateL() {
|
||||
long[] a = new long[RANGE];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = RANDOM.nextLong();
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
static float[] generateF() {
|
||||
float[] a = new float[RANGE];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = Float.intBitsToFloat(RANDOM.nextInt());
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
static double[] generateD() {
|
||||
double[] a = new double[RANGE];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = Double.longBitsToDouble(RANDOM.nextLong());
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
static void verify(String name, Object[] gold, Object[] result) {
|
||||
if (gold.length != result.length) {
|
||||
throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
|
||||
gold.length + ", result.length = " + result.length);
|
||||
}
|
||||
for (int i = 0; i < gold.length; i++) {
|
||||
Object g = gold[i];
|
||||
Object r = result[i];
|
||||
if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
|
||||
throw new RuntimeException("verify " + name + ": must both be array of same type:" +
|
||||
" gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
|
||||
" result[" + i + "].getClass() = " + r.getClass().getSimpleName());
|
||||
}
|
||||
if (g == r) {
|
||||
throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
|
||||
" gold[" + i + "] == result[" + i + "]");
|
||||
}
|
||||
if (Array.getLength(g) != Array.getLength(r)) {
|
||||
throw new RuntimeException("verify " + name + ": arrays must have same length:" +
|
||||
" gold[" + i + "].length = " + Array.getLength(g) +
|
||||
" result[" + i + "].length = " + Array.getLength(r));
|
||||
}
|
||||
Class c = g.getClass().getComponentType();
|
||||
if (c == byte.class) {
|
||||
verifyB(name, i, (byte[])g, (byte[])r);
|
||||
} else if (c == short.class) {
|
||||
verifyS(name, i, (short[])g, (short[])r);
|
||||
} else if (c == char.class) {
|
||||
verifyC(name, i, (char[])g, (char[])r);
|
||||
} else if (c == int.class) {
|
||||
verifyI(name, i, (int[])g, (int[])r);
|
||||
} else if (c == long.class) {
|
||||
verifyL(name, i, (long[])g, (long[])r);
|
||||
} else if (c == float.class) {
|
||||
verifyF(name, i, (float[])g, (float[])r);
|
||||
} else if (c == double.class) {
|
||||
verifyD(name, i, (double[])g, (double[])r);
|
||||
} else {
|
||||
throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
|
||||
" gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
|
||||
" result[" + i + "].getClass() = " + r.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verifyB(String name, int i, byte[] g, byte[] r) {
|
||||
for (int j = 0; j < g.length; j++) {
|
||||
if (g[j] != r[j]) {
|
||||
throw new RuntimeException("verify " + name + ": arrays must have same content:" +
|
||||
" gold[" + i + "][" + j + "] = " + g[j] +
|
||||
" result[" + i + "][" + j + "] = " + r[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verifyS(String name, int i, short[] g, short[] r) {
|
||||
for (int j = 0; j < g.length; j++) {
|
||||
if (g[j] != r[j]) {
|
||||
throw new RuntimeException("verify " + name + ": arrays must have same content:" +
|
||||
" gold[" + i + "][" + j + "] = " + g[j] +
|
||||
" result[" + i + "][" + j + "] = " + r[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verifyC(String name, int i, char[] g, char[] r) {
|
||||
for (int j = 0; j < g.length; j++) {
|
||||
if (g[j] != r[j]) {
|
||||
throw new RuntimeException("verify " + name + ": arrays must have same content:" +
|
||||
" gold[" + i + "][" + j + "] = " + g[j] +
|
||||
" result[" + i + "][" + j + "] = " + r[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verifyI(String name, int i, int[] g, int[] r) {
|
||||
for (int j = 0; j < g.length; j++) {
|
||||
if (g[j] != r[j]) {
|
||||
throw new RuntimeException("verify " + name + ": arrays must have same content:" +
|
||||
" gold[" + i + "][" + j + "] = " + g[j] +
|
||||
" result[" + i + "][" + j + "] = " + r[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verifyL(String name, int i, long[] g, long[] r) {
|
||||
for (int j = 0; j < g.length; j++) {
|
||||
if (g[j] != r[j]) {
|
||||
throw new RuntimeException("verify " + name + ": arrays must have same content:" +
|
||||
" gold[" + i + "][" + j + "] = " + g[j] +
|
||||
" result[" + i + "][" + j + "] = " + r[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verifyF(String name, int i, float[] g, float[] r) {
|
||||
for (int j = 0; j < g.length; j++) {
|
||||
if (Float.floatToIntBits(g[j]) != Float.floatToIntBits(r[j])) {
|
||||
throw new RuntimeException("verify " + name + ": arrays must have same content:" +
|
||||
" gold[" + i + "][" + j + "] = " + g[j] +
|
||||
" result[" + i + "][" + j + "] = " + r[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verifyD(String name, int i, double[] g, double[] r) {
|
||||
for (int j = 0; j < g.length; j++) {
|
||||
if (Double.doubleToLongBits(g[j]) != Double.doubleToLongBits(r[j])) {
|
||||
throw new RuntimeException("verify " + name + ": arrays must have same content:" +
|
||||
" gold[" + i + "][" + j + "] = " + g[j] +
|
||||
" result[" + i + "][" + j + "] = " + r[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_VECTOR, "= 0"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// "inflate" method: 1 byte -> 2 byte.
|
||||
// Java scalar code has no explicit conversion.
|
||||
// Vector code would need a conversion. We may add this in the future.
|
||||
static Object[] test0(byte[] src, char[] dst) {
|
||||
for (int i = 0; i < src.length; i++) {
|
||||
dst[i] = (char)(src[i] & 0xff);
|
||||
}
|
||||
return new Object[]{ src, dst };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_VECTOR, "= 0"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// "inflate" method: 1 byte -> 2 byte.
|
||||
// Java scalar code has no explicit conversion.
|
||||
// Vector code would need a conversion. We may add this in the future.
|
||||
static Object[] test1(byte[] src, char[] dst) {
|
||||
for (int i = 0; i < src.length; i++) {
|
||||
dst[i] = (char)(src[i]);
|
||||
}
|
||||
return new Object[]{ src, dst };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_VECTOR, "= 0"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// "deflate" method: 2 byte -> 1 byte.
|
||||
// Java scalar code has no explicit conversion.
|
||||
// Vector code would need a conversion. We may add this in the future.
|
||||
static Object[] test2(byte[] src, char[] dst) {
|
||||
for (int i = 0; i < src.length; i++) {
|
||||
src[i] = (byte)(dst[i]);
|
||||
}
|
||||
return new Object[]{ src, dst };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||||
IRNode.ADD_VI, "> 0",
|
||||
IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// Used to not vectorize because of "alignment boundaries".
|
||||
// Assume 64 byte vector width:
|
||||
// a[i+0:i+15] and a[i+1:i+16], each are 4 * 16 = 64 byte.
|
||||
// The alignment boundary is every 64 byte, so one of the two vectors gets cut up.
|
||||
static Object[] test3(int[] a, int[] b) {
|
||||
for (int i = 0; i < a.length-1; i++) {
|
||||
a[i] = (int)(b[i] + a[i+1]);
|
||||
}
|
||||
return new Object[]{ a, b };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||||
IRNode.ADD_VI, "> 0",
|
||||
IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// same as test3, but hand-unrolled
|
||||
static Object[] test4(int[] a, int[] b) {
|
||||
for (int i = 0; i < a.length-2; i+=2) {
|
||||
a[i+0] = (int)(b[i+0] + a[i+1]);
|
||||
a[i+1] = (int)(b[i+1] + a[i+2]);
|
||||
}
|
||||
return new Object[]{ a, b };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_VECTOR, "= 0"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// In theory, one would expect this to be a simple 4byte -> 4byte conversion.
|
||||
// But there is a CmpF and CMove here because we check for isNaN. Plus a MoveF2I.
|
||||
//
|
||||
// Would be nice to vectorize: Missing support for CmpF, CMove and MoveF2I.
|
||||
static Object[] test5(int[] a, float[] b) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = Float.floatToIntBits(b[i]);
|
||||
}
|
||||
return new Object[]{ a, b };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_VECTOR, "= 0"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// Missing support for MoveF2I
|
||||
static Object[] test6(int[] a, float[] b) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = Float.floatToRawIntBits(b[i]);
|
||||
}
|
||||
return new Object[]{ a, b };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_VECTOR, "= 0"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// Missing support for MoveI2F
|
||||
static Object[] test7(int[] a, float[] b) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
b[i] = Float.intBitsToFloat(a[i]);
|
||||
}
|
||||
return new Object[]{ a, b };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_VECTOR, "= 0"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// Missing support for Needs CmpD, CMove and MoveD2L
|
||||
static Object[] test8(long[] a, double[] b) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = Double.doubleToLongBits(b[i]);
|
||||
}
|
||||
return new Object[]{ a, b };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_VECTOR, "= 0"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// Missing support for MoveD2L
|
||||
static Object[] test9(long[] a, double[] b) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
a[i] = Double.doubleToRawLongBits(b[i]);
|
||||
}
|
||||
return new Object[]{ a, b };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_VECTOR, "= 0"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// Missing support for MoveL2D
|
||||
static Object[] test10(long[] a, double[] b) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
b[i] = Double.longBitsToDouble(a[i]);
|
||||
}
|
||||
return new Object[]{ a, b };
|
||||
}
|
||||
|
||||
@Test
|
||||
// MaxI reduction is with char type, but the MaxI char vector is not implemented.
|
||||
static Object[] test11(char[] a) {
|
||||
char m = 0;
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
m = (char)Math.max(m, a[i]);
|
||||
a[i] = 0;
|
||||
}
|
||||
return new Object[]{ a, new char[] { m } };
|
||||
}
|
||||
}
|
@ -390,9 +390,9 @@ public class TestSplitPacks {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
|
||||
IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "= 0",
|
||||
IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
|
||||
IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
|
||||
IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "= 0",
|
||||
IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
|
||||
IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"MaxVectorSize", ">=32"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
@ -405,8 +405,6 @@ public class TestSplitPacks {
|
||||
// | | \ \ \ \
|
||||
// 0 1 - - 4 5 6 7
|
||||
//
|
||||
// The 4-pack does not vectorize. This is a technical limitation that
|
||||
// we can hopefully soon remove. Load and store offsets are different.
|
||||
static Object[] test2a(int[] a, int[] b, int mask) {
|
||||
for (int i = 0; i < RANGE; i+=8) {
|
||||
int b0 = a[i+0] & mask;
|
||||
@ -428,9 +426,9 @@ public class TestSplitPacks {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "= 0",
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
|
||||
IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
|
||||
IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "= 0",
|
||||
IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
|
||||
IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
|
||||
IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"MaxVectorSize", ">=32"},
|
||||
@ -444,8 +442,6 @@ public class TestSplitPacks {
|
||||
// | | | | \ \
|
||||
// 0 1 2 3 -- 6 7
|
||||
//
|
||||
// The 2-pack does not vectorize. This is a technical limitation that
|
||||
// we can hopefully soon remove. Load and store offsets are different.
|
||||
static Object[] test2b(int[] a, int[] b, int mask) {
|
||||
for (int i = 0; i < RANGE; i+=8) {
|
||||
int b0 = a[i+0] & mask;
|
||||
@ -468,9 +464,9 @@ public class TestSplitPacks {
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
|
||||
IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "= 0",
|
||||
IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
|
||||
IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
|
||||
IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "= 0",
|
||||
IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
|
||||
IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"MaxVectorSize", ">=32"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
@ -483,8 +479,6 @@ public class TestSplitPacks {
|
||||
// | | / / / /
|
||||
// 0 1 2 3 4 5 - -
|
||||
//
|
||||
// The 4-pack does not vectorize. This is a technical limitation that
|
||||
// we can hopefully soon remove. Load and store offsets are different.
|
||||
static Object[] test2c(int[] a, int[] b, int mask) {
|
||||
for (int i = 0; i < RANGE; i+=8) {
|
||||
int b0 = a[i+0] & mask;
|
||||
@ -506,9 +500,9 @@ public class TestSplitPacks {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "= 0",
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
|
||||
IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
|
||||
IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "= 0",
|
||||
IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
|
||||
IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
|
||||
IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"MaxVectorSize", ">=32"},
|
||||
@ -522,8 +516,6 @@ public class TestSplitPacks {
|
||||
// | | | | / /
|
||||
// 0 1 2 3 4 5 - -
|
||||
//
|
||||
// The 2-pack does not vectorize. This is a technical limitation that
|
||||
// we can hopefully soon remove. Load and store offsets are different.
|
||||
static Object[] test2d(int[] a, int[] b, int mask) {
|
||||
for (int i = 0; i < RANGE; i+=8) {
|
||||
int b0 = a[i+0] & mask;
|
||||
|
Loading…
Reference in New Issue
Block a user