8325252: C2 SuperWord: refactor the packset

Reviewed-by: chagedorn, kvn
This commit is contained in:
Emanuel Peter 2024-04-02 06:10:08 +00:00
parent 6b1b0e9d45
commit 5cddc2de49
6 changed files with 862 additions and 651 deletions

File diff suppressed because it is too large Load Diff

View File

@ -57,6 +57,333 @@
class VPointer;
// The PairSet is a set of pairs. These are later combined to packs,
// and stored in the PackSet.
class PairSet : public StackObj {
private:
const VLoop& _vloop;
const VLoopBody& _body;
// Doubly-linked pairs. If not linked: -1
GrowableArray<int> _left_to_right; // bb_idx -> bb_idx
GrowableArray<int> _right_to_left; // bb_idx -> bb_idx
// Example:
//
// Pairs: (n1, n2) and (n2, n3)
// bb_idx(n1) = 1
// bb_idx(n2) = 3
// bb_idx(n3) = 5
//
// index / bb_idx: 0 1 2 3 4 5 6
//
// left_to_right: | | 3 | | 5 | | | |
// n1----->
// n2----->
//
// right_to_left: | | | | 1 | | 3 | |
// <------n2
// <------n3
//
// Nodes with bb_idx 0, 2, 4, and 6 are in no pair, they are thus neither left nor right elements,
// and hence have no entries in the mapping.
//
// Nodes with bb_idx 1 and 3 (n1 and n2) are both a left element in some pair. Therefore, they both
// have an entry in the left_to_right mapping. This mapping indicates which right element they are
// paired with, namely the nodes with bb_idx 3 and 5 (n2 and n3), respectively.
//
// Nodes with bb_idx 3 and 5 (n2 and n4) are both a right element in some pair. Therefore, they both
// have an entry in the right_to_left mapping. This mapping indicates which left element they are
// paired with, namely the nodes with bb_idx 1 and 3 (n1 and n2), respectively.
//
// Node n1 with bb_idx 1 is not a right element in any pair, thus its right_to_left is empty.
//
// Node n2 with bb_idx 3 is both a left element of pair (n2, n3), and a right element of pair (n1, n2).
// Thus it has entries in both left_to_right (mapping n2->n3) and right_to_left (mapping n2->n1).
//
// Node n3 with bb_idx 5 is not a left element in any pair, thus its left_to_right is empty.
// List of all left elements bb_idx, in the order of pair addition.
GrowableArray<int> _lefts_in_insertion_order;
public:
// Initialize empty, i.e. all not linked (-1).
PairSet(Arena* arena, const VLoopAnalyzer& vloop_analyzer) :
_vloop(vloop_analyzer.vloop()),
_body(vloop_analyzer.body()),
_left_to_right(arena, _body.body().length(), _body.body().length(), -1),
_right_to_left(arena, _body.body().length(), _body.body().length(), -1),
_lefts_in_insertion_order(arena, 8, 0, 0) {}
const VLoopBody& body() const { return _body; }
bool is_empty() const { return _lefts_in_insertion_order.is_empty(); }
bool is_left(int i) const { return _left_to_right.at(i) != -1; }
bool is_right(int i) const { return _right_to_left.at(i) != -1; }
bool is_left(const Node* n) const { return _vloop.in_bb(n) && is_left( _body.bb_idx(n)); }
bool is_right(const Node* n) const { return _vloop.in_bb(n) && is_right(_body.bb_idx(n)); }
bool is_pair(const Node* n1, const Node* n2) const { return is_left(n1) && get_right_for(n1) == n2; }
bool is_left_in_a_left_most_pair(int i) const { return is_left(i) && !is_right(i); }
bool is_right_in_a_right_most_pair(int i) const { return !is_left(i) && is_right(i); }
bool is_left_in_a_left_most_pair(const Node* n) const { return is_left_in_a_left_most_pair( _body.bb_idx(n)); }
bool is_right_in_a_right_most_pair(const Node* n) const { return is_right_in_a_right_most_pair(_body.bb_idx(n)); }
int get_right_for(int i) const { return _left_to_right.at(i); }
Node* get_right_for(const Node* n) const { return _body.body().at(get_right_for(_body.bb_idx(n))); }
Node* get_right_or_null_for(const Node* n) const { return is_left(n) ? get_right_for(n) : nullptr; }
// To access elements in insertion order:
int length() const { return _lefts_in_insertion_order.length(); }
Node* left_at_in_insertion_order(int i) const { return _body.body().at(_lefts_in_insertion_order.at(i)); }
Node* right_at_in_insertion_order(int i) const { return _body.body().at(get_right_for(_lefts_in_insertion_order.at(i))); }
void add_pair(Node* n1, Node* n2) {
assert(n1 != nullptr && n2 != nullptr && n1 != n2, "no nullptr, and different nodes");
assert(!is_left(n1) && !is_right(n2), "cannot be left twice, or right twice");
int bb_idx_1 = _body.bb_idx(n1);
int bb_idx_2 = _body.bb_idx(n2);
_left_to_right.at_put(bb_idx_1, bb_idx_2);
_right_to_left.at_put(bb_idx_2, bb_idx_1);
_lefts_in_insertion_order.append(bb_idx_1);
assert(is_left(n1) && is_right(n2), "must be set now");
}
NOT_PRODUCT(void print() const;)
};
// Iterate over the PairSet, pair-chain by pair-chain.
// A pair-chain starts with a "left-most" pair (n1, n2), where n1 is never a right-element
// in any pair. We walk a chain: (n2, n3), (n3, n4) ... until we hit a "right-most" pair
// where the right-element is never a left-element of any pair.
// These pair-chains will later be combined into packs by combine_pairs_to_longer_packs.
class PairSetIterator : public StackObj {
private:
const PairSet& _pairset;
const VLoopBody& _body;
int _chain_start_bb_idx; // bb_idx of left-element in the left-most pair.
int _current_bb_idx; // bb_idx of left-element of the current pair.
const int _end_bb_idx;
public:
PairSetIterator(const PairSet& pairset) :
_pairset(pairset),
_body(pairset.body()),
_chain_start_bb_idx(-1),
_current_bb_idx(-1),
_end_bb_idx(_body.body().length())
{
next_chain();
}
bool done() const {
return _chain_start_bb_idx >= _end_bb_idx;
}
Node* left() const {
return _body.body().at(_current_bb_idx);
}
Node* right() const {
int bb_idx_2 = _pairset.get_right_for(_current_bb_idx);
return _body.body().at(bb_idx_2);
}
// Try to keep walking on the current pair-chain, else find a new pair-chain.
void next() {
assert(_pairset.is_left(_current_bb_idx), "current was valid");
_current_bb_idx = _pairset.get_right_for(_current_bb_idx);
if (!_pairset.is_left(_current_bb_idx)) {
next_chain();
}
}
private:
void next_chain() {
do {
_chain_start_bb_idx++;
} while (!done() && !_pairset.is_left_in_a_left_most_pair(_chain_start_bb_idx));
_current_bb_idx = _chain_start_bb_idx;
}
};
class SplitTask {
private:
enum Kind {
// The lambda method for split_packs can return one of these tasks:
Unchanged, // The pack is left in the packset, unchanged.
Rejected, // The pack is removed from the packset.
Split, // Split away split_size nodes from the end of the pack.
};
const Kind _kind;
const uint _split_size;
const char* _message;
SplitTask(const Kind kind, const uint split_size, const char* message) :
_kind(kind), _split_size(split_size), _message(message)
{
assert(message != nullptr, "must have message");
assert(_kind != Unchanged || split_size == 0, "unchanged task conditions");
assert(_kind != Rejected || split_size == 0, "reject task conditions");
assert(_kind != Split || split_size != 0, "split task conditions");
}
public:
static SplitTask make_split(const uint split_size, const char* message) {
return SplitTask(Split, split_size, message);
}
static SplitTask make_unchanged() {
return SplitTask(Unchanged, 0, "unchanged");
}
static SplitTask make_rejected(const char* message) {
return SplitTask(Rejected, 0, message);
}
bool is_unchanged() const { return _kind == Unchanged; }
bool is_rejected() const { return _kind == Rejected; }
bool is_split() const { return _kind == Split; }
const char* message() const { return _message; }
uint split_size() const {
assert(is_split(), "only split tasks have split_size");
return _split_size;
}
};
class SplitStatus {
private:
enum Kind {
// After split_pack, we have: first_pack second_pack
Unchanged, // The pack is left in the pack, unchanged. old_pack nullptr
Rejected, // The pack is removed from the packset. nullptr nullptr
Modified, // The pack had some nodes removed. old_pack nullptr
Split, // The pack was split into two packs. pack1 pack2
};
Kind _kind;
Node_List* _first_pack;
Node_List* _second_pack;
SplitStatus(Kind kind, Node_List* first_pack, Node_List* second_pack) :
_kind(kind), _first_pack(first_pack), _second_pack(second_pack)
{
assert(_kind != Unchanged || (first_pack != nullptr && second_pack == nullptr), "unchanged status conditions");
assert(_kind != Rejected || (first_pack == nullptr && second_pack == nullptr), "rejected status conditions");
assert(_kind != Modified || (first_pack != nullptr && second_pack == nullptr), "modified status conditions");
assert(_kind != Split || (first_pack != nullptr && second_pack != nullptr), "split status conditions");
}
public:
static SplitStatus make_unchanged(Node_List* old_pack) {
return SplitStatus(Unchanged, old_pack, nullptr);
}
static SplitStatus make_rejected() {
return SplitStatus(Rejected, nullptr, nullptr);
}
static SplitStatus make_modified(Node_List* first_pack) {
return SplitStatus(Modified, first_pack, nullptr);
}
static SplitStatus make_split(Node_List* first_pack, Node_List* second_pack) {
return SplitStatus(Split, first_pack, second_pack);
}
bool is_unchanged() const { return _kind == Unchanged; }
Node_List* first_pack() const { return _first_pack; }
Node_List* second_pack() const { return _second_pack; }
};
class PackSet : public StackObj {
private:
const VLoop& _vloop;
const VLoopBody& _body;
// Set of all packs:
GrowableArray<Node_List*> _packs;
// Mapping from nodes to their pack: bb_idx -> pack
GrowableArray<Node_List*> _node_to_pack;
NOT_PRODUCT(const bool _trace_packset;)
NOT_PRODUCT(const bool _trace_rejections;)
public:
// Initialize empty, i.e. no packs, and unmapped (nullptr).
PackSet(Arena* arena, const VLoopAnalyzer& vloop_analyzer
NOT_PRODUCT(COMMA bool trace_packset COMMA bool trace_rejections)
) :
_vloop(vloop_analyzer.vloop()),
_body(vloop_analyzer.body()),
_packs(arena, 8, 0, nullptr),
_node_to_pack(arena, _body.body().length(), _body.body().length(), nullptr)
NOT_PRODUCT(COMMA _trace_packset(trace_packset))
NOT_PRODUCT(COMMA _trace_rejections(trace_rejections))
{}
// Accessors to iterate over packs.
int length() const { return _packs.length(); }
bool is_empty() const { return _packs.is_empty(); }
Node_List* at(int i) const { return _packs.at(i); }
private:
void map_node_in_pack(const Node* n, Node_List* new_pack) {
assert(get_pack(n) == nullptr, "was previously unmapped");
_node_to_pack.at_put(_body.bb_idx(n), new_pack);
}
void remap_node_in_pack(const Node* n, Node_List* new_pack) {
assert(get_pack(n) != nullptr && new_pack != nullptr && get_pack(n) != new_pack, "was previously mapped");
_node_to_pack.at_put(_body.bb_idx(n), new_pack);
}
void unmap_node_in_pack(const Node* n) {
assert(get_pack(n) != nullptr, "was previously mapped");
_node_to_pack.at_put(_body.bb_idx(n), nullptr);
}
void unmap_all_nodes_in_pack(Node_List* old_pack) {
for (uint i = 0; i < old_pack->size(); i++) {
unmap_node_in_pack(old_pack->at(i));
}
}
public:
Node_List* get_pack(const Node* n) const { return !_vloop.in_bb(n) ? nullptr : _node_to_pack.at(_body.bb_idx(n)); }
void add_pack(Node_List* pack) {
_packs.append(pack);
for (uint i = 0; i < pack->size(); i++) {
Node* n = pack->at(i);
map_node_in_pack(n, pack);
}
}
private:
SplitStatus split_pack(const char* split_name, Node_List* pack, SplitTask task);
public:
template <typename SplitStrategy>
void split_packs(const char* split_name, SplitStrategy strategy);
template <typename FilterPredicate>
void filter_packs(const char* filter_name,
const char* rejection_message,
FilterPredicate filter);
void clear() { _packs.clear(); }
private:
NOT_PRODUCT(bool is_trace_superword_packset() const { return _trace_packset; })
NOT_PRODUCT(bool is_trace_superword_rejections() const { return _trace_rejections; })
public:
DEBUG_ONLY(void verify() const;)
NOT_PRODUCT(void print() const;)
NOT_PRODUCT(static void print_pack(Node_List* pack);)
};
// ========================= SuperWord =====================
// -----------------------------SWNodeInfo---------------------------------
@ -64,9 +391,8 @@ class VPointer;
class SWNodeInfo {
public:
int _alignment; // memory alignment for a node
Node_List* _my_pack; // pack containing this node
SWNodeInfo() : _alignment(-1), _my_pack(nullptr) {}
SWNodeInfo() : _alignment(-1) {}
static const SWNodeInfo initial;
};
@ -83,12 +409,13 @@ class SuperWord : public ResourceObj {
enum consts { top_align = -1, bottom_align = -666 };
GrowableArray<Node_List*> _packset; // Packs for the current block
GrowableArray<SWNodeInfo> _node_info; // Info needed per node
CloneMap& _clone_map; // map of nodes created in cloning
MemNode const* _align_to_ref; // Memory reference that pre-loop will align to
PairSet _pairset;
PackSet _packset;
public:
SuperWord(const VLoopAnalyzer &vloop_analyzer);
@ -112,7 +439,7 @@ class SuperWord : public ResourceObj {
return _vloop_analyzer.reductions().is_marked_reduction(n);
}
bool reduction(Node* n1, Node* n2) const {
bool reduction(const Node* n1, const Node* n2) const {
return _vloop_analyzer.reductions().is_marked_reduction_pair(n1, n2);
}
@ -219,9 +546,10 @@ class SuperWord : public ResourceObj {
bool do_vector_loop() { return _do_vector_loop; }
const GrowableArray<Node_List*>& packset() const { return _packset; }
const PackSet& packset() const { return _packset; }
Node_List* get_pack(const Node* n) const { return _packset.get_pack(n); }
private:
bool _race_possible; // In cases where SDMU is true
bool _do_vector_loop; // whether to do vectorization/simd style
int _num_work_vecs; // Number of non memory vector operations
int _num_reductions; // Number of reduction expressions applied
@ -240,18 +568,13 @@ class SuperWord : public ResourceObj {
bool vectors_should_be_aligned() { return !Matcher::misaligned_vectors_ok() || AlignVector; }
// memory alignment for a node
int alignment(Node* n) { return _node_info.adr_at(bb_idx(n))->_alignment; }
int alignment(Node* n) const { return _node_info.adr_at(bb_idx(n))->_alignment; }
void set_alignment(Node* n, int a) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; }
// my_pack
public:
Node_List* my_pack(const Node* n) const { return !in_bb(n) ? nullptr : _node_info.adr_at(bb_idx(n))->_my_pack; }
private:
void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }
// is pack good for converting into one vector node replacing bunches of Cmp, Bool, CMov nodes.
static bool requires_long_to_int_conversion(int opc);
// For pack p, are all idx operands the same?
bool same_inputs(const Node_List* p, int idx);
bool same_inputs(const Node_List* p, int idx) const;
// CloneMap utilities
bool same_origin_idx(Node* a, Node* b) const;
bool same_generation(Node* a, Node* b) const;
@ -267,10 +590,8 @@ private:
// Can s1 and s2 be in a pack with s1 immediately preceding s2 and s1 aligned at "align"
bool stmts_can_pack(Node* s1, Node* s2, int align);
// Does s exist in a pack at position pos?
bool exists_at(Node* s, uint pos);
// Is s1 immediately before s2 in memory?
bool are_adjacent_refs(Node* s1, Node* s2);
bool are_adjacent_refs(Node* s1, Node* s2) const;
// Are s1 and s2 similar?
bool isomorphic(Node* s1, Node* s2);
// Do we have pattern n1 = (iv + c) and n2 = (iv + c + 1)?
@ -279,143 +600,31 @@ private:
// do s1 and s2 have similar input edges?
bool have_similar_inputs(Node* s1, Node* s2);
void set_alignment(Node* s1, Node* s2, int align);
// Extend packset by following use->def and def->use links from pack members.
void extend_packset_with_more_pairs_by_following_use_and_def();
int adjust_alignment_for_type_conversion(Node* s, Node* t, int align);
// Extend the packset by visiting operand definitions of nodes in pack p
bool follow_use_defs(Node_List* p);
// Extend the packset by visiting uses of nodes in pack p
bool follow_def_uses(Node_List* p);
// For extended packsets, ordinally arrange uses packset by major component
void order_def_uses(Node_List* p);
// Estimate the savings from executing s1 and s2 as a pack
int est_savings(Node* s1, Node* s2);
int adjacent_profit(Node* s1, Node* s2);
int pack_cost(int ct);
int unpack_cost(int ct);
// Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
void extend_pairset_with_more_pairs_by_following_use_and_def();
bool extend_pairset_with_more_pairs_by_following_def(Node* s1, Node* s2);
bool extend_pairset_with_more_pairs_by_following_use(Node* s1, Node* s2);
void order_inputs_of_all_use_pairs_to_match_def_pair(Node* def1, Node* def2);
enum PairOrderStatus { Ordered, Unordered, Unknown };
PairOrderStatus order_inputs_of_uses_to_match_def_pair(Node* def1, Node* def2, Node* use1, Node* use2);
int estimate_cost_savings_when_packing_as_pair(const Node* s1, const Node* s2) const;
void combine_pairs_to_longer_packs();
class SplitTask {
private:
enum Kind {
// The lambda method for split_packs can return one of these tasks:
Unchanged, // The pack is left in the packset, unchanged.
Rejected, // The pack is removed from the packset.
Split, // Split away split_size nodes from the end of the pack.
};
const Kind _kind;
const uint _split_size;
const char* _message;
SplitTask(const Kind kind, const uint split_size, const char* message) :
_kind(kind), _split_size(split_size), _message(message)
{
assert(message != nullptr, "must have message");
assert(_kind != Unchanged || split_size == 0, "unchanged task conditions");
assert(_kind != Rejected || split_size == 0, "reject task conditions");
assert(_kind != Split || split_size != 0, "split task conditions");
}
public:
static SplitTask make_split(const uint split_size, const char* message) {
return SplitTask(Split, split_size, message);
}
static SplitTask make_unchanged() {
return SplitTask(Unchanged, 0, "unchanged");
}
static SplitTask make_rejected(const char* message) {
return SplitTask(Rejected, 0, message);
}
bool is_unchanged() const { return _kind == Unchanged; }
bool is_rejected() const { return _kind == Rejected; }
bool is_split() const { return _kind == Split; }
const char* message() const { return _message; }
uint split_size() const {
assert(is_split(), "only split tasks have split_size");
return _split_size;
}
};
class SplitStatus {
private:
enum Kind {
// After split_pack, we have: first_pack second_pack
Unchanged, // The pack is left in the pack, unchanged. old_pack nullptr
Rejected, // The pack is removed from the packset. nullptr nullptr
Modified, // The pack had some nodes removed. old_pack nullptr
Split, // The pack was split into two packs. pack1 pack2
};
Kind _kind;
Node_List* _first_pack;
Node_List* _second_pack;
SplitStatus(Kind kind, Node_List* first_pack, Node_List* second_pack) :
_kind(kind), _first_pack(first_pack), _second_pack(second_pack)
{
assert(_kind != Unchanged || (first_pack != nullptr && second_pack == nullptr), "unchanged status conditions");
assert(_kind != Rejected || (first_pack == nullptr && second_pack == nullptr), "rejected status conditions");
assert(_kind != Modified || (first_pack != nullptr && second_pack == nullptr), "modified status conditions");
assert(_kind != Split || (first_pack != nullptr && second_pack != nullptr), "split status conditions");
}
public:
static SplitStatus make_unchanged(Node_List* old_pack) {
return SplitStatus(Unchanged, old_pack, nullptr);
}
static SplitStatus make_rejected() {
return SplitStatus(Rejected, nullptr, nullptr);
}
static SplitStatus make_modified(Node_List* first_pack) {
return SplitStatus(Modified, first_pack, nullptr);
}
static SplitStatus make_split(Node_List* first_pack, Node_List* second_pack) {
return SplitStatus(Split, first_pack, second_pack);
}
bool is_unchanged() const { return _kind == Unchanged; }
Node_List* first_pack() const { return _first_pack; }
Node_List* second_pack() const { return _second_pack; }
};
SplitStatus split_pack(const char* split_name, Node_List* pack, SplitTask task);
template <typename SplitStrategy>
void split_packs(const char* split_name, SplitStrategy strategy);
void split_packs_at_use_def_boundaries();
void split_packs_only_implemented_with_smaller_size();
void split_packs_to_break_mutual_dependence();
// Filter out packs with various filter predicates
template <typename FilterPredicate>
void filter_packs(const char* filter_name,
const char* error_message,
FilterPredicate filter);
void filter_packs_for_power_of_2_size();
void filter_packs_for_mutual_independence();
// Ensure all packs are aligned, if AlignVector is on.
void filter_packs_for_alignment();
// Find the set of alignment solutions for load/store pack.
const AlignmentSolution* pack_alignment_solution(const Node_List* pack);
// Compress packset, such that it has no nullptr entries.
void compress_packset();
// Construct the map from nodes to packs.
void construct_my_pack_map();
// Remove packs that are not implemented.
void filter_packs_for_implemented();
// Remove packs that are not profitable.
void filter_packs_for_profitable();
// Verify that for every pack, all nodes are mutually independent.
// Also verify that packset and my_pack are consistent.
DEBUG_ONLY(void verify_packs();)
DEBUG_ONLY(void verify_packs() const;)
// Adjust the memory graph for the packed operations
void schedule();
// Helper function for schedule, that reorders all memops, slice by slice, according to the schedule
@ -427,12 +636,13 @@ private:
Node* vector_opd(Node_List* p, int opd_idx);
// Can code be generated for the pack, restricted to size nodes?
bool implemented(const Node_List* pack, uint size);
bool implemented(const Node_List* pack, const uint size) const;
// Find the maximal implemented size smaller or equal to the packs size
uint max_implemented_size(const Node_List* pack);
// For pack p, are all operands and all uses (with in the block) vector?
bool profitable(const Node_List* p);
bool profitable(const Node_List* p) const;
// Verify that all uses of packs are also packs, i.e. we do not need extract operations.
DEBUG_ONLY(void verify_no_extract();)
@ -440,35 +650,22 @@ private:
bool has_use_pack_superset(const Node* n1, const Node* n2) const;
// Find a boundary in the pack, where left and right have different pack uses and defs.
uint find_use_def_boundary(const Node_List* pack) const;
// Is use->in(u_idx) a vector use?
bool is_vector_use(Node* use, int u_idx);
bool is_vector_use(Node* use, int u_idx) const;
// Initialize per node info
void initialize_node_info();
// Compute max depth for expressions from beginning of block
void compute_max_depth();
// Return the longer type for vectorizable type-conversion node or illegal type for other nodes.
BasicType longer_type_for_conversion(Node* n);
BasicType longer_type_for_conversion(Node* n) const;
// Find the longest type in def-use chain for packed nodes, and then compute the max vector size.
int max_vector_size_in_def_use_chain(Node* n);
// Are s1 and s2 in a pack pair and ordered as s1,s2?
bool in_packset(Node* s1, Node* s2);
// Remove the pack at position pos in the packset
void remove_pack_at(int pos);
static LoadNode::ControlDependency control_dependency(Node_List* p);
// Alignment within a vector memory reference
int memory_alignment(MemNode* s, int iv_adjust);
// Ensure that the main loop vectors are aligned by adjusting the pre loop limit.
void adjust_pre_loop_limit_to_align_main_loop_vectors();
// Is the use of d1 in u1 at the same operand position as d2 in u2?
bool opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2);
// print methods
void print_packset();
void print_pack(Node_List* p);
void print_stmt(Node* s);
void packset_sort(int n);
};
#endif // SHARE_OPTO_SUPERWORD_HPP

View File

@ -275,7 +275,7 @@ public:
bool is_marked_reduction_loop() const { return !_loop_reductions.is_empty(); }
// Are s1 and s2 reductions with a data path between them?
bool is_marked_reduction_pair(Node* s1, Node* s2) const;
bool is_marked_reduction_pair(const Node* s1, const Node* s2) const;
private:
// Whether n is a standard reduction operator.

View File

@ -426,7 +426,7 @@ bool VectorNode::is_type_transition_to_int(Node* n) {
return is_type_transition_short_to_int(n);
}
bool VectorNode::is_muladds2i(Node* n) {
bool VectorNode::is_muladds2i(const Node* n) {
if (n->Opcode() == Op_MulAddS2I) {
return true;
}

View File

@ -102,7 +102,7 @@ class VectorNode : public TypeNode {
static bool is_vshift_cnt(Node* n);
static bool is_type_transition_short_to_int(Node* n);
static bool is_type_transition_to_int(Node* n);
static bool is_muladds2i(Node* n);
static bool is_muladds2i(const Node* n);
static bool is_roundopD(Node* n);
static bool is_scalar_rotate(Node* n);
static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);

View File

@ -23,7 +23,7 @@
/**
* @test
* @bug 8310886
* @bug 8310886 8325252
* @summary Test MulAddS2I vectorization.
* @library /test/lib /
* @run driver compiler.loopopts.superword.TestMulAddS2I
@ -36,14 +36,20 @@ import jdk.test.lib.Asserts;
import jdk.test.lib.Platform;
public class TestMulAddS2I {
static final int RANGE = 1024;
static final int RANGE = 1024*16;
static final int ITER = RANGE/2 - 1;
static short[] sArr1 = new short[RANGE];
static short[] sArr2 = new short[RANGE];
static int[] ioutArr = new int[RANGE];
static final int[] GOLDEN_A;
static final int[] GOLDEN_B;
static final int[] GOLDEN_C;
static final int[] GOLDEN_D;
static final int[] GOLDEN_E;
static final int[] GOLDEN_F;
static final int[] GOLDEN_G;
static final int[] GOLDEN_H;
static {
for (int i = 0; i < RANGE; i++) {
@ -53,6 +59,11 @@ public class TestMulAddS2I {
GOLDEN_A = testa();
GOLDEN_B = testb();
GOLDEN_C = testc();
GOLDEN_D = testd();
GOLDEN_E = teste();
GOLDEN_F = testf();
GOLDEN_G = testg();
GOLDEN_H = testh();
}
@ -65,12 +76,17 @@ public class TestMulAddS2I {
}
}
@Run(test = {"testa", "testb", "testc"})
@Run(test = {"testa", "testb", "testc", "testd", "teste", "testf", "testg", "testh"})
@Warmup(0)
public static void run() {
compare(testa(), GOLDEN_A, "testa");
compare(testb(), GOLDEN_B, "testb");
compare(testb(), GOLDEN_C, "testc");
compare(testc(), GOLDEN_C, "testc");
compare(testd(), GOLDEN_D, "testd");
compare(teste(), GOLDEN_E, "teste");
compare(testf(), GOLDEN_F, "testf");
compare(testg(), GOLDEN_G, "testg");
compare(testh(), GOLDEN_H, "testh");
}
public static void compare(int[] out, int[] golden, String name) {
@ -133,4 +149,99 @@ public class TestMulAddS2I {
}
return out;
}
@Test
@IR(applyIfCPUFeature = {"sse2", "true"},
applyIfPlatform = {"64-bit", "true"},
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
@IR(applyIfCPUFeature = {"asimd", "true"},
applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
@IR(applyIfCPUFeature = {"avx512_vnni", "true"},
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
public static int[] testd() {
int[] out = ioutArr;
for (int i = 0; i < ITER-2; i+=2) {
// Unrolled, with the same structure.
out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3]));
}
return out;
}
@Test
@IR(applyIfCPUFeature = {"sse2", "true"},
applyIfPlatform = {"64-bit", "true"},
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
@IR(applyIfCPUFeature = {"asimd", "true"},
applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
@IR(applyIfCPUFeature = {"avx512_vnni", "true"},
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
public static int[] teste() {
int[] out = ioutArr;
for (int i = 0; i < ITER-2; i+=2) {
// Unrolled, with some swaps.
out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // swap(1 2)
}
return out;
}
@Test
@IR(applyIfCPUFeature = {"sse2", "true"},
applyIfPlatform = {"64-bit", "true"},
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
@IR(applyIfCPUFeature = {"asimd", "true"},
applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
@IR(applyIfCPUFeature = {"avx512_vnni", "true"},
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
public static int[] testf() {
int[] out = ioutArr;
for (int i = 0; i < ITER-2; i+=2) {
// Unrolled, with some swaps.
out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr2[2*i+3] * sArr1[2*i+3])); // swap(1 2), swap(3 4)
}
return out;
}
@Test
@IR(applyIfCPUFeature = {"sse2", "true"},
applyIfPlatform = {"64-bit", "true"},
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
@IR(applyIfCPUFeature = {"asimd", "true"},
applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
@IR(applyIfCPUFeature = {"avx512_vnni", "true"},
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
public static int[] testg() {
int[] out = ioutArr;
for (int i = 0; i < ITER-2; i+=2) {
// Unrolled, with some swaps.
out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
out[i+1] += ((sArr1[2*i+3] * sArr2[2*i+3]) + (sArr1[2*i+2] * sArr2[2*i+2])); // swap(1 3), swap(2 4)
}
return out;
}
@Test
@IR(applyIfCPUFeature = {"sse2", "true"},
applyIfPlatform = {"64-bit", "true"},
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
@IR(applyIfCPUFeature = {"asimd", "true"},
applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
@IR(applyIfCPUFeature = {"avx512_vnni", "true"},
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
public static int[] testh() {
int[] out = ioutArr;
for (int i = 0; i < ITER-2; i+=2) {
// Unrolled, with some swaps.
out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
out[i+1] += ((sArr2[2*i+3] * sArr1[2*i+3]) + (sArr2[2*i+2] * sArr1[2*i+2])); // swap(1 4), swap(2 3)
}
return out;
}
}