diff --git a/src/hotspot/share/opto/loopnode.cpp b/src/hotspot/share/opto/loopnode.cpp index 7bd92f33af7..23251efd9d1 100644 --- a/src/hotspot/share/opto/loopnode.cpp +++ b/src/hotspot/share/opto/loopnode.cpp @@ -620,7 +620,7 @@ static bool no_side_effect_since_safepoint(Compile* C, Node* x, Node* mem, Merge SafePointNode* safepoint = nullptr; for (DUIterator_Fast imax, i = x->fast_outs(imax); i < imax; i++) { Node* u = x->fast_out(i); - if (u->is_Phi() && u->bottom_type() == Type::MEMORY) { + if (u->is_memory_phi()) { Node* m = u->in(LoopNode::LoopBackControl); if (u->adr_type() == TypePtr::BOTTOM) { if (m->is_MergeMem() && mem->is_MergeMem()) { @@ -2639,7 +2639,7 @@ void OuterStripMinedLoopNode::fix_sunk_stores(CountedLoopEndNode* inner_cle, Loo #ifdef ASSERT for (DUIterator_Fast jmax, j = inner_cl->fast_outs(jmax); j < jmax; j++) { Node* uu = inner_cl->fast_out(j); - if (uu->is_Phi() && uu->bottom_type() == Type::MEMORY) { + if (uu->is_memory_phi()) { if (uu->adr_type() == igvn->C->get_adr_type(igvn->C->get_alias_index(u->adr_type()))) { assert(phi == uu, "what's that phi?"); } else if (uu->adr_type() == TypePtr::BOTTOM) { @@ -5715,6 +5715,51 @@ Node* CountedLoopNode::is_canonical_loop_entry() { return res ? cmpzm->in(input) : nullptr; } +// Find pre loop end from main loop. Returns nullptr if none. +CountedLoopEndNode* CountedLoopNode::find_pre_loop_end() { + assert(is_main_loop(), "Can only find pre-loop from main-loop"); + // The loop cannot be optimized if the graph shape at the loop entry is + // inappropriate. + if (is_canonical_loop_entry() == nullptr) { + return nullptr; + } + + Node* p_f = skip_assertion_predicates_with_halt()->in(0)->in(0); + if (!p_f->is_IfFalse() || !p_f->in(0)->is_CountedLoopEnd()) { + return nullptr; + } + CountedLoopEndNode* pre_end = p_f->in(0)->as_CountedLoopEnd(); + CountedLoopNode* loop_node = pre_end->loopnode(); + if (loop_node == nullptr || !loop_node->is_pre_loop()) { + return nullptr; + } + return pre_end; +} + + CountedLoopNode* CountedLoopNode::pre_loop_head() const { + assert(is_main_loop(), "Only main loop has pre loop"); + assert(_pre_loop_end != nullptr && _pre_loop_end->loopnode() != nullptr, + "should find head from pre loop end"); + return _pre_loop_end->loopnode(); + } + + CountedLoopEndNode* CountedLoopNode::pre_loop_end() { +#ifdef ASSERT + assert(is_main_loop(), "Only main loop has pre loop"); + assert(_pre_loop_end != nullptr, "should be set when fetched"); + Node* found_pre_end = find_pre_loop_end(); + assert(_pre_loop_end == found_pre_end && _pre_loop_end == pre_loop_head()->loopexit(), + "should find the pre loop end and must be the same result"); +#endif + return _pre_loop_end; + } + + void CountedLoopNode::set_pre_loop_end(CountedLoopEndNode* pre_loop_end) { + assert(is_main_loop(), "Only main loop has pre loop"); + assert(pre_loop_end, "must be valid"); + _pre_loop_end = pre_loop_end; + } + //------------------------------get_late_ctrl---------------------------------- // Compute latest legal control. Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) { diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp index 7544ef98da7..1ec00a8642f 100644 --- a/src/hotspot/share/opto/loopnode.hpp +++ b/src/hotspot/share/opto/loopnode.hpp @@ -232,14 +232,14 @@ class CountedLoopNode : public BaseCountedLoopNode { // vector mapped unroll factor here int _slp_maximum_unroll_factor; - // The eventual count of vectorizable packs in slp - int _slp_vector_pack_count; + // Cached CountedLoopEndNode of pre loop for main loops + CountedLoopEndNode* _pre_loop_end; public: CountedLoopNode(Node *entry, Node *backedge) : BaseCountedLoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint), _unrolled_count_log2(0), _node_count_before_unroll(0), - _slp_maximum_unroll_factor(0), _slp_vector_pack_count(0) { + _slp_maximum_unroll_factor(0), _pre_loop_end(nullptr) { init_class_id(Class_CountedLoop); // Initialize _trip_count to the largest possible value. // Will be reset (lower) if the loop's trip count is known. @@ -330,6 +330,10 @@ public: } Node* is_canonical_loop_entry(); + CountedLoopEndNode* find_pre_loop_end(); + CountedLoopNode* pre_loop_head() const; + CountedLoopEndNode* pre_loop_end(); + void set_pre_loop_end(CountedLoopEndNode* pre_loop_end); #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp index 50134fe0e02..c94c439430b 100644 --- a/src/hotspot/share/opto/node.hpp +++ b/src/hotspot/share/opto/node.hpp @@ -1219,6 +1219,9 @@ public: // Whether this is a memory-writing machine node. bool is_memory_writer() const { return is_Mach() && bottom_type()->has_memory(); } + // Whether this is a memory phi node + bool is_memory_phi() const { return is_Phi() && bottom_type() == Type::MEMORY; } + //----------------- Printing, etc #ifndef PRODUCT public: diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index 8cd3e46dcf5..c6ada90cbcc 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -22,21 +22,16 @@ */ #include "precompiled.hpp" -#include "compiler/compileLog.hpp" #include "libadt/vectset.hpp" #include "memory/allocation.inline.hpp" #include "memory/resourceArea.hpp" #include "opto/addnode.hpp" -#include "opto/callnode.hpp" #include "opto/castnode.hpp" #include "opto/convertnode.hpp" -#include "opto/divnode.hpp" #include "opto/matcher.hpp" #include "opto/memnode.hpp" -#include "opto/mulnode.hpp" #include "opto/opcodes.hpp" #include "opto/opaquenode.hpp" -#include "opto/rootnode.hpp" #include "opto/superword.hpp" #include "opto/vectornode.hpp" #include "opto/movenode.hpp" @@ -69,7 +64,6 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) : _stk(arena(), 8, 0, nullptr), // scratch stack of nodes _lpt(nullptr), // loop tree node _lp(nullptr), // CountedLoopNode - _pre_loop_end(nullptr), // Pre loop CountedLoopEndNode _loop_reductions(arena()), // reduction nodes in the current loop _bb(nullptr), // basic block _iv(nullptr), // induction var @@ -153,7 +147,7 @@ bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { if (cl->is_main_loop()) { // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit)))) - CountedLoopEndNode* pre_end = find_pre_loop_end(cl); + CountedLoopEndNode* pre_end = cl->find_pre_loop_end(); if (pre_end == nullptr) { return false; } @@ -161,7 +155,7 @@ bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { if (pre_opaq1->Opcode() != Op_Opaque1) { return false; } - set_pre_loop_end(pre_end); + cl->set_pre_loop_end(pre_end); } init(); // initialize data structures @@ -216,7 +210,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) { } } - if (n->is_Phi() && (n->bottom_type() == Type::MEMORY)) { + if (n->is_memory_phi()) { Node* n_tail = n->in(LoopNode::LoopBackControl); if (n_tail != n->in(LoopNode::EntryControl)) { if (!n_tail->is_Mem()) { @@ -264,7 +258,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) { nstack.push(adr, stack_idx++); } else { // Mark the components of the memory operation in nstack - SWPointer p1(current, this, &nstack, true); + VPointer p1(current, phase(), lpt(), &nstack, true); have_side_effects = p1.node_stack()->is_nonempty(); } @@ -625,13 +619,13 @@ void SuperWord::find_adjacent_refs() { NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);) } - SWPointer align_to_ref_p(mem_ref, this, nullptr, false); + VPointer align_to_ref_p(mem_ref, phase(), lpt(), nullptr, false); // Set alignment relative to "align_to_ref" for all related memory operations. for (int i = memops.size() - 1; i >= 0; i--) { MemNode* s = memops.at(i)->as_Mem(); if (isomorphic(s, mem_ref) && (!_do_vector_loop || same_origin_idx(s, mem_ref))) { - SWPointer p2(s, this, nullptr, false); + VPointer p2(s, phase(), lpt(), nullptr, false); if (p2.comparable(align_to_ref_p)) { int align = memory_alignment(s, iv_adjustment); set_alignment(s, align); @@ -753,7 +747,7 @@ void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best // If strict memory alignment is required (vectors_should_be_aligned), then check if // mem_ref is aligned with best_align_to_mem_ref. -bool SuperWord::mem_ref_has_no_alignment_violation(MemNode* mem_ref, int iv_adjustment, SWPointer &align_to_ref_p, +bool SuperWord::mem_ref_has_no_alignment_violation(MemNode* mem_ref, int iv_adjustment, VPointer& align_to_ref_p, MemNode* best_align_to_mem_ref, int best_iv_adjustment, Node_List &align_to_refs) { if (!vectors_should_be_aligned()) { @@ -784,7 +778,7 @@ bool SuperWord::mem_ref_has_no_alignment_violation(MemNode* mem_ref, int iv_adju // (3) Ensure that all vectors have the same invariant. We model memory accesses like this // address = base + k*iv + constant [+ invar] // memory_alignment ignores the invariant. - SWPointer p2(best_align_to_mem_ref, this, nullptr, false); + VPointer p2(best_align_to_mem_ref, phase(), lpt(), nullptr, false); if (!align_to_ref_p.invar_equals(p2)) { // Do not vectorize memory accesses with different invariants // if unaligned memory accesses are not allowed. @@ -803,7 +797,7 @@ MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) { // Count number of comparable memory ops for (uint i = 0; i < memops.size(); i++) { MemNode* s1 = memops.at(i)->as_Mem(); - SWPointer p1(s1, this, nullptr, false); + VPointer p1(s1, phase(), lpt(), nullptr, false); // Only discard unalignable memory references if vector memory references // should be aligned on this platform. if (vectors_should_be_aligned() && !ref_is_alignable(p1)) { @@ -813,7 +807,7 @@ MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) { for (uint j = i+1; j < memops.size(); j++) { MemNode* s2 = memops.at(j)->as_Mem(); if (isomorphic(s1, s2)) { - SWPointer p2(s2, this, nullptr, false); + VPointer p2(s2, phase(), lpt(), nullptr, false); if (p1.comparable(p2)) { (*cmp_ct.adr_at(i))++; (*cmp_ct.adr_at(j))++; @@ -834,7 +828,7 @@ MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) { if (s->is_Store()) { int vw = vector_width_in_bytes(s); assert(vw > 1, "sanity"); - SWPointer p(s, this, nullptr, false); + VPointer p(s, phase(), lpt(), nullptr, false); if ( cmp_ct.at(j) > max_ct || (cmp_ct.at(j) == max_ct && ( vw > max_vw || @@ -857,7 +851,7 @@ MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) { if (s->is_Load()) { int vw = vector_width_in_bytes(s); assert(vw > 1, "sanity"); - SWPointer p(s, this, nullptr, false); + VPointer p(s, phase(), lpt(), nullptr, false); if ( cmp_ct.at(j) > max_ct || (cmp_ct.at(j) == max_ct && ( vw > max_vw || @@ -921,11 +915,11 @@ static bool span_works_for_memory_size(MemNode* mem, int span, int mem_size, int //------------------------------ref_is_alignable--------------------------- // Can the preloop align the reference to position zero in the vector? -bool SuperWord::ref_is_alignable(SWPointer& p) { +bool SuperWord::ref_is_alignable(VPointer& p) { if (!p.has_iv()) { return true; // no induction variable } - CountedLoopEndNode* pre_end = pre_loop_end(); + CountedLoopEndNode* pre_end = lp()->pre_loop_end(); assert(pre_end->stride_is_con(), "pre loop stride is constant"); int preloop_stride = pre_end->stride_con(); @@ -1019,7 +1013,7 @@ int SuperWord::get_vw_bytes_special(MemNode* s) { //---------------------------get_iv_adjustment--------------------------- // Calculate loop's iv adjustment for this memory ops. int SuperWord::get_iv_adjustment(MemNode* mem_ref) { - SWPointer align_to_ref_p(mem_ref, this, nullptr, false); + VPointer align_to_ref_p(mem_ref, phase(), lpt(), nullptr, false); int offset = align_to_ref_p.offset_in_bytes(); int scale = align_to_ref_p.scale_in_bytes(); int elt_size = align_to_ref_p.memory_size(); @@ -1063,7 +1057,7 @@ void SuperWord::dependence_graph() { // First, assign a dependence node to each memory node for (int i = 0; i < _block.length(); i++ ) { Node *n = _block.at(i); - if (n->is_Mem() || (n->is_Phi() && n->bottom_type() == Type::MEMORY)) { + if (n->is_Mem() || n->is_memory_phi()) { _dg.make_node(n); } } @@ -1100,13 +1094,13 @@ void SuperWord::dependence_graph() { if (_dg.dep(s1)->in_cnt() == 0) { _dg.make_edge(slice, s1); } - SWPointer p1(s1->as_Mem(), this, nullptr, false); + VPointer p1(s1->as_Mem(), phase(), lpt(), nullptr, false); bool sink_dependent = true; for (int k = j - 1; k >= 0; k--) { Node* s2 = _nlist.at(k); if (s1->is_Load() && s2->is_Load()) continue; - SWPointer p2(s2->as_Mem(), this, nullptr, false); + VPointer p2(s2->as_Mem(), phase(), lpt(), nullptr, false); int cmp = p1.cmp(p2); if (SuperWordRTDepCheck && @@ -1115,7 +1109,7 @@ void SuperWord::dependence_graph() { OrderedPair pp(p1.base(), p2.base()); _disjoint_ptrs.append_if_missing(pp); } - if (!SWPointer::not_equal(cmp)) { + if (!VPointer::not_equal(cmp)) { // Possibly same address _dg.make_edge(s1, s2); sink_dependent = false; @@ -1171,7 +1165,7 @@ void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray &p if (out->is_MergeMem() && !in_bb(out)) { // Either unrolling is causing a memory edge not to disappear, // or need to run igvn.optimize() again before SLP - } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) { + } else if (out->is_memory_phi() && !in_bb(out)) { // Ditto. Not sure what else to check further. } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) { // StoreCM has an input edge used as a precedence edge. @@ -1257,8 +1251,8 @@ bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) { // Adjacent memory references must have the same base, be comparable // and have the correct distance between them. - SWPointer p1(s1->as_Mem(), this, nullptr, false); - SWPointer p2(s2->as_Mem(), this, nullptr, false); + VPointer p1(s1->as_Mem(), phase(), lpt(), nullptr, false); + VPointer p2(s2->as_Mem(), phase(), lpt(), nullptr, false); if (p1.base() != p2.base() || !p1.comparable(p2)) return false; int diff = p2.offset_in_bytes() - p1.offset_in_bytes(); return diff == data_size(s1); @@ -2544,14 +2538,14 @@ bool SuperWord::output() { uint max_vlen_in_bytes = 0; uint max_vlen = 0; - NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);}) + NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("VPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);}) CountedLoopReserveKit make_reversable(_phase, _lpt, do_reserve_copy()); - NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);}) + NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("VPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);}) if (do_reserve_copy() && !make_reversable.has_reserved()) { - NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: loop was not reserved correctly, exiting SuperWord");}) + NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("VPointer::output: loop was not reserved correctly, exiting SuperWord");}) return false; } @@ -2566,20 +2560,20 @@ bool SuperWord::output() { uint vlen = p->size(); uint vlen_in_bytes = 0; Node* vn = nullptr; - NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d executed first, %d executed last in pack", first->_idx, n->_idx); print_pack(p);}) + NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("VPointer::output: %d executed first, %d executed last in pack", first->_idx, n->_idx); print_pack(p);}) int opc = n->Opcode(); if (n->is_Load()) { Node* ctl = n->in(MemNode::Control); Node* mem = first->in(MemNode::Memory); - SWPointer p1(n->as_Mem(), this, nullptr, false); + VPointer p1(n->as_Mem(), phase(), lpt(), nullptr, false); // Identify the memory dependency for the new loadVector node by // walking up through memory chain. // This is done to give flexibility to the new loadVector node so that // it can move above independent storeVector nodes. while (mem->is_StoreVector()) { - SWPointer p2(mem->as_Mem(), this, nullptr, false); + VPointer p2(mem->as_Mem(), phase(), lpt(), nullptr, false); int cmp = p1.cmp(p2); - if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) { + if (VPointer::not_equal(cmp) || !VPointer::comparable(cmp)) { mem = mem->in(MemNode::Memory); } else { break; // dependent memory @@ -2594,7 +2588,7 @@ bool SuperWord::output() { Node* val = vector_opd(p, MemNode::ValueIn); if (val == nullptr) { if (do_reserve_copy()) { - NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: val should not be null, exiting SuperWord");}) + NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("VPointer::output: val should not be null, exiting SuperWord");}) assert(false, "input to vector store was not created"); return false; //and reverse to backup IG } @@ -2736,7 +2730,7 @@ bool SuperWord::output() { in1 = vector_opd(p, 1); if (in1 == nullptr) { if (do_reserve_copy()) { - NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in1 should not be null, exiting SuperWord");}) + NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("VPointer::output: in1 should not be null, exiting SuperWord");}) assert(false, "input in1 to vector operand was not created"); return false; //and reverse to backup IG } @@ -2746,7 +2740,7 @@ bool SuperWord::output() { Node* in2 = vector_opd(p, 2); if (in2 == nullptr) { if (do_reserve_copy()) { - NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in2 should not be null, exiting SuperWord");}) + NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("VPointer::output: in2 should not be null, exiting SuperWord");}) assert(false, "input in2 to vector operand was not created"); return false; //and reverse to backup IG } @@ -2820,7 +2814,7 @@ bool SuperWord::output() { vlen_in_bytes = vn->as_Vector()->length_in_bytes(); } else { if (do_reserve_copy()) { - NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: Unhandled scalar opcode (%s), ShouldNotReachHere, exiting SuperWord", NodeClassNames[opc]);}) + NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("VPointer::output: Unhandled scalar opcode (%s), ShouldNotReachHere, exiting SuperWord", NodeClassNames[opc]);}) assert(false, "Unhandled scalar opcode (%s)", NodeClassNames[opc]); return false; //and reverse to backup IG } @@ -2830,7 +2824,7 @@ bool SuperWord::output() { assert(vn != nullptr, "sanity"); if (vn == nullptr) { if (do_reserve_copy()){ - NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: got null node, cannot proceed, exiting SuperWord");}) + NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("VPointer::output: got null node, cannot proceed, exiting SuperWord");}) return false; //and reverse to backup IG } ShouldNotReachHere(); @@ -3190,7 +3184,7 @@ bool SuperWord::construct_bb() { // Find memory slices (head and tail) for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) { Node *n = lp()->fast_out(i); - if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) { + if (in_bb(n) && n->is_memory_phi()) { Node* n_tail = n->in(LoopNode::LoopBackControl); if (n_tail != n->in(LoopNode::EntryControl)) { if (!n_tail->is_Mem()) { @@ -3501,15 +3495,14 @@ int SuperWord::memory_alignment(MemNode* s, int iv_adjust) { tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump(); } #endif - NOT_PRODUCT(SWPointer::Tracer::Depth ddd(0);) - SWPointer p(s, this, nullptr, false); + VPointer p(s, phase(), lpt(), nullptr, false); if (!p.valid()) { - NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");) + NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("VPointer::memory_alignment: VPointer p invalid, return bottom_align");) return bottom_align; } int vw = get_vw_bytes_special(s); if (vw < 2) { - NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");) + NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("VPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");) return bottom_align; // No vectors for this type } int offset = p.offset_in_bytes(); @@ -3518,7 +3511,7 @@ int SuperWord::memory_alignment(MemNode* s, int iv_adjust) { int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; #ifndef PRODUCT if ((TraceSuperWord && Verbose) || is_trace_alignment()) { - tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d (offset = %d)", off_rem, off_mod, offset); + tty->print_cr("VPointer::memory_alignment: off_rem = %d, off_mod = %d (offset = %d)", off_rem, off_mod, offset); } #endif return off_mod; @@ -3637,21 +3630,21 @@ LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) { // (iv + k) mod vector_align == 0 void SuperWord::align_initial_loop_index(MemNode* align_to_ref) { assert(lp()->is_main_loop(), ""); - CountedLoopEndNode* pre_end = pre_loop_end(); + CountedLoopEndNode* pre_end = lp()->pre_loop_end(); Node* pre_opaq1 = pre_end->limit(); assert(pre_opaq1->Opcode() == Op_Opaque1, ""); Opaque1Node* pre_opaq = (Opaque1Node*)pre_opaq1; Node* lim0 = pre_opaq->in(1); // Where we put new limit calculations - Node* pre_ctrl = pre_loop_head()->in(LoopNode::EntryControl); + Node* pre_ctrl = lp()->pre_loop_head()->in(LoopNode::EntryControl); // Ensure the original loop limit is available from the // pre-loop Opaque1 node. Node* orig_limit = pre_opaq->original_loop_limit(); assert(orig_limit != nullptr && _igvn.type(orig_limit) != Type::TOP, ""); - SWPointer align_to_ref_p(align_to_ref, this, nullptr, false); + VPointer align_to_ref_p(align_to_ref, phase(), lpt(), nullptr, false); assert(align_to_ref_p.valid(), "sanity"); // Given: @@ -3793,24 +3786,6 @@ void SuperWord::align_initial_loop_index(MemNode* align_to_ref) { _igvn.replace_input_of(pre_opaq, 1, constrained); } -//----------------------------get_pre_loop_end--------------------------- -// Find pre loop end from main loop. Returns null if none. -CountedLoopEndNode* SuperWord::find_pre_loop_end(CountedLoopNode* cl) const { - // The loop cannot be optimized if the graph shape at - // the loop entry is inappropriate. - if (cl->is_canonical_loop_entry() == nullptr) { - return nullptr; - } - - Node* p_f = cl->skip_assertion_predicates_with_halt()->in(0)->in(0); - if (!p_f->is_IfFalse()) return nullptr; - if (!p_f->in(0)->is_CountedLoopEnd()) return nullptr; - CountedLoopEndNode* pre_end = p_f->in(0)->as_CountedLoopEnd(); - CountedLoopNode* loop_node = pre_end->loopnode(); - if (loop_node == nullptr || !loop_node->is_pre_loop()) return nullptr; - return pre_end; -} - //------------------------------init--------------------------- void SuperWord::init() { _dg.init(); @@ -3873,673 +3848,6 @@ void SuperWord::print_stmt(Node* s) { #endif } -//==============================SWPointer=========================== -#ifndef PRODUCT -int SWPointer::Tracer::_depth = 0; -#endif -//----------------------------SWPointer------------------------ -SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) : - _mem(mem), _slp(slp), _base(nullptr), _adr(nullptr), - _scale(0), _offset(0), _invar(nullptr), -#ifdef ASSERT - _debug_invar(nullptr), _debug_negate_invar(false), _debug_invar_scale(nullptr), -#endif - _nstack(nstack), _analyze_only(analyze_only), - _stack_idx(0) -#ifndef PRODUCT - , _tracer(slp) -#endif -{ - NOT_PRODUCT(_tracer.ctor_1(mem);) - - Node* adr = mem->in(MemNode::Address); - if (!adr->is_AddP()) { - assert(!valid(), "too complex"); - return; - } - // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant) - Node* base = adr->in(AddPNode::Base); - // The base address should be loop invariant - if (is_loop_member(base)) { - assert(!valid(), "base address is loop variant"); - return; - } - // unsafe references require misaligned vector access support - if (base->is_top() && !Matcher::misaligned_vectors_ok()) { - assert(!valid(), "unsafe access"); - return; - } - - NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.store_depth();) - NOT_PRODUCT(_tracer.ctor_2(adr);) - - int i; - for (i = 0; ; i++) { - NOT_PRODUCT(_tracer.ctor_3(adr, i);) - - if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { - assert(!valid(), "too complex"); - return; - } - adr = adr->in(AddPNode::Address); - NOT_PRODUCT(_tracer.ctor_4(adr, i);) - - if (base == adr || !adr->is_AddP()) { - NOT_PRODUCT(_tracer.ctor_5(adr, base, i);) - break; // stop looking at addp's - } - } - if (is_loop_member(adr)) { - assert(!valid(), "adr is loop variant"); - return; - } - - if (!base->is_top() && adr != base) { - assert(!valid(), "adr and base differ"); - return; - } - - NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.restore_depth();) - NOT_PRODUCT(_tracer.ctor_6(mem);) - - _base = base; - _adr = adr; - assert(valid(), "Usable"); -} - -// Following is used to create a temporary object during -// the pattern match of an address expression. -SWPointer::SWPointer(SWPointer* p) : - _mem(p->_mem), _slp(p->_slp), _base(nullptr), _adr(nullptr), - _scale(0), _offset(0), _invar(nullptr), -#ifdef ASSERT - _debug_invar(nullptr), _debug_negate_invar(false), _debug_invar_scale(nullptr), -#endif - _nstack(p->_nstack), _analyze_only(p->_analyze_only), - _stack_idx(p->_stack_idx) - #ifndef PRODUCT - , _tracer(p->_slp) - #endif -{} - -bool SWPointer::is_loop_member(Node* n) const { - Node* n_c = phase()->get_ctrl(n); - return lpt()->is_member(phase()->get_loop(n_c)); -} - -bool SWPointer::invariant(Node* n) const { - NOT_PRODUCT(Tracer::Depth dd;) - Node* n_c = phase()->get_ctrl(n); - NOT_PRODUCT(_tracer.invariant_1(n, n_c);) - bool is_not_member = !is_loop_member(n); - if (is_not_member && _slp->lp()->is_main_loop()) { - // Check that n_c dominates the pre loop head node. If it does not, then we cannot use n as invariant for the pre loop - // CountedLoopEndNode check because n_c is either part of the pre loop or between the pre and the main loop (illegal - // invariant: Happens, for example, when n_c is a CastII node that prevents data nodes to flow above the main loop). - return phase()->is_dominator(n_c, _slp->pre_loop_head()); - } - return is_not_member; -} - -//------------------------scaled_iv_plus_offset-------------------- -// Match: k*iv + offset -// where: k is a constant that maybe zero, and -// offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional -bool SWPointer::scaled_iv_plus_offset(Node* n) { - NOT_PRODUCT(Tracer::Depth ddd;) - NOT_PRODUCT(_tracer.scaled_iv_plus_offset_1(n);) - - if (scaled_iv(n)) { - NOT_PRODUCT(_tracer.scaled_iv_plus_offset_2(n);) - return true; - } - - if (offset_plus_k(n)) { - NOT_PRODUCT(_tracer.scaled_iv_plus_offset_3(n);) - return true; - } - - int opc = n->Opcode(); - if (opc == Op_AddI) { - if (offset_plus_k(n->in(2)) && scaled_iv_plus_offset(n->in(1))) { - NOT_PRODUCT(_tracer.scaled_iv_plus_offset_4(n);) - return true; - } - if (offset_plus_k(n->in(1)) && scaled_iv_plus_offset(n->in(2))) { - NOT_PRODUCT(_tracer.scaled_iv_plus_offset_5(n);) - return true; - } - } else if (opc == Op_SubI || opc == Op_SubL) { - if (offset_plus_k(n->in(2), true) && scaled_iv_plus_offset(n->in(1))) { - NOT_PRODUCT(_tracer.scaled_iv_plus_offset_6(n);) - return true; - } - if (offset_plus_k(n->in(1)) && scaled_iv_plus_offset(n->in(2))) { - _scale *= -1; - NOT_PRODUCT(_tracer.scaled_iv_plus_offset_7(n);) - return true; - } - } - - NOT_PRODUCT(_tracer.scaled_iv_plus_offset_8(n);) - return false; -} - -//----------------------------scaled_iv------------------------ -// Match: k*iv where k is a constant that's not zero -bool SWPointer::scaled_iv(Node* n) { - NOT_PRODUCT(Tracer::Depth ddd;) - NOT_PRODUCT(_tracer.scaled_iv_1(n);) - - if (_scale != 0) { // already found a scale - NOT_PRODUCT(_tracer.scaled_iv_2(n, _scale);) - return false; - } - - if (n == iv()) { - _scale = 1; - NOT_PRODUCT(_tracer.scaled_iv_3(n, _scale);) - return true; - } - if (_analyze_only && (is_loop_member(n))) { - _nstack->push(n, _stack_idx++); - } - - int opc = n->Opcode(); - if (opc == Op_MulI) { - if (n->in(1) == iv() && n->in(2)->is_Con()) { - _scale = n->in(2)->get_int(); - NOT_PRODUCT(_tracer.scaled_iv_4(n, _scale);) - return true; - } else if (n->in(2) == iv() && n->in(1)->is_Con()) { - _scale = n->in(1)->get_int(); - NOT_PRODUCT(_tracer.scaled_iv_5(n, _scale);) - return true; - } - } else if (opc == Op_LShiftI) { - if (n->in(1) == iv() && n->in(2)->is_Con()) { - _scale = 1 << n->in(2)->get_int(); - NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);) - return true; - } - } else if (opc == Op_ConvI2L || opc == Op_CastII) { - if (scaled_iv_plus_offset(n->in(1))) { - NOT_PRODUCT(_tracer.scaled_iv_7(n);) - return true; - } - } else if (opc == Op_LShiftL && n->in(2)->is_Con()) { - if (!has_iv()) { - // Need to preserve the current _offset value, so - // create a temporary object for this expression subtree. - // Hacky, so should re-engineer the address pattern match. - NOT_PRODUCT(Tracer::Depth dddd;) - SWPointer tmp(this); - NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);) - - if (tmp.scaled_iv_plus_offset(n->in(1))) { - int scale = n->in(2)->get_int(); - _scale = tmp._scale << scale; - _offset += tmp._offset << scale; - if (tmp._invar != nullptr) { - BasicType bt = tmp._invar->bottom_type()->basic_type(); - assert(bt == T_INT || bt == T_LONG, ""); - maybe_add_to_invar(register_if_new(LShiftNode::make(tmp._invar, n->in(2), bt)), false); -#ifdef ASSERT - _debug_invar_scale = n->in(2); -#endif - } - NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, _invar);) - return true; - } - } - } - NOT_PRODUCT(_tracer.scaled_iv_10(n);) - return false; -} - -//----------------------------offset_plus_k------------------------ -// Match: offset is (k [+/- invariant]) -// where k maybe zero and invariant is optional, but not both. -bool SWPointer::offset_plus_k(Node* n, bool negate) { - NOT_PRODUCT(Tracer::Depth ddd;) - NOT_PRODUCT(_tracer.offset_plus_k_1(n);) - - int opc = n->Opcode(); - if (opc == Op_ConI) { - _offset += negate ? -(n->get_int()) : n->get_int(); - NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);) - return true; - } else if (opc == Op_ConL) { - // Okay if value fits into an int - const TypeLong* t = n->find_long_type(); - if (t->higher_equal(TypeLong::INT)) { - jlong loff = n->get_long(); - jint off = (jint)loff; - _offset += negate ? -off : loff; - NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);) - return true; - } - NOT_PRODUCT(_tracer.offset_plus_k_4(n);) - return false; - } - assert((_debug_invar == nullptr) == (_invar == nullptr), ""); - - if (_analyze_only && is_loop_member(n)) { - _nstack->push(n, _stack_idx++); - } - if (opc == Op_AddI) { - if (n->in(2)->is_Con() && invariant(n->in(1))) { - maybe_add_to_invar(n->in(1), negate); - _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); - NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, negate, _offset);) - return true; - } else if (n->in(1)->is_Con() && invariant(n->in(2))) { - _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); - maybe_add_to_invar(n->in(2), negate); - NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, negate, _offset);) - return true; - } - } - if (opc == Op_SubI) { - if (n->in(2)->is_Con() && invariant(n->in(1))) { - maybe_add_to_invar(n->in(1), negate); - _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); - NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, negate, _offset);) - return true; - } else if (n->in(1)->is_Con() && invariant(n->in(2))) { - _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); - maybe_add_to_invar(n->in(2), !negate); - NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, !negate, _offset);) - return true; - } - } - - if (!is_loop_member(n)) { - // 'n' is loop invariant. Skip ConvI2L and CastII nodes before checking if 'n' is dominating the pre loop. - if (opc == Op_ConvI2L) { - n = n->in(1); - } - if (n->Opcode() == Op_CastII) { - // Skip CastII nodes - assert(!is_loop_member(n), "sanity"); - n = n->in(1); - } - // Check if 'n' can really be used as invariant (not in main loop and dominating the pre loop). - if (invariant(n)) { - maybe_add_to_invar(n, negate); - NOT_PRODUCT(_tracer.offset_plus_k_10(n, _invar, negate, _offset);) - return true; - } - } - - NOT_PRODUCT(_tracer.offset_plus_k_11(n);) - return false; -} - -Node* SWPointer::maybe_negate_invar(bool negate, Node* invar) { -#ifdef ASSERT - _debug_negate_invar = negate; -#endif - if (negate) { - BasicType bt = invar->bottom_type()->basic_type(); - assert(bt == T_INT || bt == T_LONG, ""); - PhaseIterGVN& igvn = phase()->igvn(); - Node* zero = igvn.zerocon(bt); - phase()->set_ctrl(zero, phase()->C->root()); - Node* sub = SubNode::make(zero, invar, bt); - invar = register_if_new(sub); - } - return invar; -} - -Node* SWPointer::register_if_new(Node* n) const { - PhaseIterGVN& igvn = phase()->igvn(); - Node* prev = igvn.hash_find_insert(n); - if (prev != nullptr) { - n->destruct(&igvn); - n = prev; - } else { - Node* c = phase()->get_early_ctrl(n); - phase()->register_new_node(n, c); - } - return n; -} - -void SWPointer::maybe_add_to_invar(Node* new_invar, bool negate) { - new_invar = maybe_negate_invar(negate, new_invar); - if (_invar == nullptr) { - _invar = new_invar; -#ifdef ASSERT - _debug_invar = new_invar; -#endif - return; - } -#ifdef ASSERT - _debug_invar = NodeSentinel; -#endif - BasicType new_invar_bt = new_invar->bottom_type()->basic_type(); - assert(new_invar_bt == T_INT || new_invar_bt == T_LONG, ""); - BasicType invar_bt = _invar->bottom_type()->basic_type(); - assert(invar_bt == T_INT || invar_bt == T_LONG, ""); - - BasicType bt = (new_invar_bt == T_LONG || invar_bt == T_LONG) ? T_LONG : T_INT; - Node* current_invar = _invar; - if (invar_bt != bt) { - assert(bt == T_LONG && invar_bt == T_INT, ""); - assert(new_invar_bt == bt, ""); - current_invar = register_if_new(new ConvI2LNode(current_invar)); - } else if (new_invar_bt != bt) { - assert(bt == T_LONG && new_invar_bt == T_INT, ""); - assert(invar_bt == bt, ""); - new_invar = register_if_new(new ConvI2LNode(new_invar)); - } - Node* add = AddNode::make(current_invar, new_invar, bt); - _invar = register_if_new(add); -} - -//----------------------------print------------------------ -void SWPointer::print() { -#ifndef PRODUCT - tty->print("base: [%d] adr: [%d] scale: %d offset: %d", - _base != nullptr ? _base->_idx : 0, - _adr != nullptr ? _adr->_idx : 0, - _scale, _offset); - if (_invar != nullptr) { - tty->print(" invar: [%d]", _invar->_idx); - } - tty->cr(); -#endif -} - -//----------------------------tracing------------------------ -#ifndef PRODUCT -void SWPointer::Tracer::print_depth() const { - for (int ii = 0; ii < _depth; ++ii) { - tty->print(" "); - } -} - -void SWPointer::Tracer::ctor_1 (Node* mem) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print(" %d SWPointer::SWPointer: start alignment analysis", mem->_idx); mem->dump(); - } -} - -void SWPointer::Tracer::ctor_2(Node* adr) { - if(_slp->is_trace_alignment()) { - //store_depth(); - inc_depth(); - print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: ", adr->_idx); adr->dump(); - inc_depth(); - print_depth(); tty->print(" %d (base) SWPointer::SWPointer: ", adr->in(AddPNode::Base)->_idx); adr->in(AddPNode::Base)->dump(); - } -} - -void SWPointer::Tracer::ctor_3(Node* adr, int i) { - if(_slp->is_trace_alignment()) { - inc_depth(); - Node* offset = adr->in(AddPNode::Offset); - print_depth(); tty->print(" %d (offset) SWPointer::SWPointer: i = %d: ", offset->_idx, i); offset->dump(); - } -} - -void SWPointer::Tracer::ctor_4(Node* adr, int i) { - if(_slp->is_trace_alignment()) { - inc_depth(); - print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: i = %d: ", adr->_idx, i); adr->dump(); - } -} - -void SWPointer::Tracer::ctor_5(Node* adr, Node* base, int i) { - if(_slp->is_trace_alignment()) { - inc_depth(); - if (base == adr) { - print_depth(); tty->print_cr(" \\ %d (adr) == %d (base) SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, base->_idx, i); - } else if (!adr->is_AddP()) { - print_depth(); tty->print_cr(" \\ %d (adr) is NOT Addp SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, i); - } - } -} - -void SWPointer::Tracer::ctor_6(Node* mem) { - if(_slp->is_trace_alignment()) { - //restore_depth(); - print_depth(); tty->print_cr(" %d (adr) SWPointer::SWPointer: stop analysis", mem->_idx); - } -} - -void SWPointer::Tracer::invariant_1(Node *n, Node *n_c) const { - if (_slp->do_vector_loop() && _slp->is_debug() && _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)) != (int)_slp->in_bb(n)) { - int is_member = _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)); - int in_bb = _slp->in_bb(n); - print_depth(); tty->print(" \\ "); tty->print_cr(" %d SWPointer::invariant conditions differ: n_c %d", n->_idx, n_c->_idx); - print_depth(); tty->print(" \\ "); tty->print_cr("is_member %d, in_bb %d", is_member, in_bb); - print_depth(); tty->print(" \\ "); n->dump(); - print_depth(); tty->print(" \\ "); n_c->dump(); - } -} - -void SWPointer::Tracer::scaled_iv_plus_offset_1(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset testing node: ", n->_idx); - n->dump(); - } -} - -void SWPointer::Tracer::scaled_iv_plus_offset_2(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx); - } -} - -void SWPointer::Tracer::scaled_iv_plus_offset_3(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx); - } -} - -void SWPointer::Tracer::scaled_iv_plus_offset_4(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump(); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump(); - } -} - -void SWPointer::Tracer::scaled_iv_plus_offset_5(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump(); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump(); - } -} - -void SWPointer::Tracer::scaled_iv_plus_offset_6(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_%s PASSED", n->_idx, n->Name()); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump(); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump(); - } -} - -void SWPointer::Tracer::scaled_iv_plus_offset_7(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_%s PASSED", n->_idx, n->Name()); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump(); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump(); - } -} - -void SWPointer::Tracer::scaled_iv_plus_offset_8(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: FAILED", n->_idx); - } -} - -void SWPointer::Tracer::scaled_iv_1(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print(" %d SWPointer::scaled_iv: testing node: ", n->_idx); n->dump(); - } -} - -void SWPointer::Tracer::scaled_iv_2(Node* n, int scale) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED since another _scale has been detected before", n->_idx); - print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: _scale (%d) != 0", scale); - } -} - -void SWPointer::Tracer::scaled_iv_3(Node* n, int scale) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: is iv, setting _scale = %d", n->_idx, scale); - } -} - -void SWPointer::Tracer::scaled_iv_4(Node* n, int scale) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump(); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); - } -} - -void SWPointer::Tracer::scaled_iv_5(Node* n, int scale) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is iv: ", n->in(2)->_idx); n->in(2)->dump(); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); - } -} - -void SWPointer::Tracer::scaled_iv_6(Node* n, int scale) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftI PASSED, setting _scale = %d", n->_idx, scale); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump(); - print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); - } -} - -void SWPointer::Tracer::scaled_iv_7(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_ConvI2L PASSED", n->_idx); - print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset: ", n->in(1)->_idx); - inc_depth(); inc_depth(); - print_depth(); n->in(1)->dump(); - dec_depth(); dec_depth(); - } -} - -void SWPointer::Tracer::scaled_iv_8(Node* n, SWPointer* tmp) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print(" %d SWPointer::scaled_iv: Op_LShiftL, creating tmp SWPointer: ", n->_idx); tmp->print(); - } -} - -void SWPointer::Tracer::scaled_iv_9(Node* n, int scale, int offset, Node* invar) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftL PASSED, setting _scale = %d, _offset = %d", n->_idx, scale, offset); - print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) [%d] is scaled_iv_plus_offset, in(2) [%d] used to scale: _scale = %d, _offset = %d", - n->in(1)->_idx, n->in(2)->_idx, scale, offset); - if (invar != nullptr) { - print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: scaled invariant: [%d]", invar->_idx); - } - inc_depth(); inc_depth(); - print_depth(); n->in(1)->dump(); - print_depth(); n->in(2)->dump(); - if (invar != nullptr) { - print_depth(); invar->dump(); - } - dec_depth(); dec_depth(); - } -} - -void SWPointer::Tracer::scaled_iv_10(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED", n->_idx); - } -} - -void SWPointer::Tracer::offset_plus_k_1(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print(" %d SWPointer::offset_plus_k: testing node: ", n->_idx); n->dump(); - } -} - -void SWPointer::Tracer::offset_plus_k_2(Node* n, int _offset) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConI PASSED, setting _offset = %d", n->_idx, _offset); - } -} - -void SWPointer::Tracer::offset_plus_k_3(Node* n, int _offset) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConL PASSED, setting _offset = %d", n->_idx, _offset); - } -} - -void SWPointer::Tracer::offset_plus_k_4(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx); - print_depth(); tty->print_cr(" \\ " JLONG_FORMAT " SWPointer::offset_plus_k: Op_ConL FAILED, k is too big", n->get_long()); - } -} - -void SWPointer::Tracer::offset_plus_k_5(Node* n, Node* _invar) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED since another invariant has been detected before", n->_idx); - print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: _invar is not null: ", _invar->_idx); _invar->dump(); - } -} - -void SWPointer::Tracer::offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _debug_negate_invar = %d, _invar = %d, _offset = %d", - n->_idx, _negate_invar, _invar->_idx, _offset); - print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); - print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump(); - } -} - -void SWPointer::Tracer::offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _debug_negate_invar = %d, _invar = %d, _offset = %d", - n->_idx, _negate_invar, _invar->_idx, _offset); - print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); - print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump(); - } -} - -void SWPointer::Tracer::offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI is PASSED, setting _debug_negate_invar = %d, _invar = %d, _offset = %d", - n->_idx, _negate_invar, _invar->_idx, _offset); - print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); - print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump(); - } -} - -void SWPointer::Tracer::offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI PASSED, setting _debug_negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset); - print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); - print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump(); - } -} - -void SWPointer::Tracer::offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: PASSED, setting _debug_negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset); - print_depth(); tty->print_cr(" \\ %d SWPointer::offset_plus_k: is invariant", n->_idx); - } -} - -void SWPointer::Tracer::offset_plus_k_11(Node* n) { - if(_slp->is_trace_alignment()) { - print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx); - } -} - -#endif // ========================= OrderedPair ===================== const OrderedPair OrderedPair::initial; @@ -4659,7 +3967,7 @@ DepSuccs::DepSuccs(Node* n, DepGraph& dg) { _next_idx = 0; _end_idx = _n->outcnt(); _dep_next = dg.dep(_n)->out_head(); - } else if (_n->is_Mem() || (_n->is_Phi() && _n->bottom_type() == Type::MEMORY)) { + } else if (_n->is_Mem() || _n->is_memory_phi()) { _next_idx = 0; _end_idx = 0; _dep_next = dg.dep(_n)->out_head(); @@ -4692,4 +4000,3 @@ bool SuperWord::same_origin_idx(Node* a, Node* b) const { bool SuperWord::same_generation(Node* a, Node* b) const { return a != nullptr && b != nullptr && _clone_map.same_gen(a->_idx, b->_idx); } - diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp index 01a99589ec8..46cce4a9663 100644 --- a/src/hotspot/share/opto/superword.hpp +++ b/src/hotspot/share/opto/superword.hpp @@ -24,13 +24,9 @@ #ifndef SHARE_OPTO_SUPERWORD_HPP #define SHARE_OPTO_SUPERWORD_HPP -#include "opto/loopnode.hpp" -#include "opto/node.hpp" -#include "opto/phaseX.hpp" -#include "opto/vectornode.hpp" +#include "opto/vectorization.hpp" #include "utilities/growableArray.hpp" #include "utilities/pair.hpp" -#include "libadt/dict.hpp" // // S U P E R W O R D T R A N S F O R M @@ -60,7 +56,7 @@ // first statement is considered the left element, and the // second statement is considered the right element. -class SWPointer; +class VPointer; class OrderedPair; // ========================= Dependence Graph ===================== @@ -229,49 +225,10 @@ class OrderedPair { static const OrderedPair initial; }; -// -----------------------VectorElementSizeStats----------------------- -// Vector lane size statistics for loop vectorization with vector masks -class VectorElementSizeStats { - private: - static const int NO_SIZE = -1; - static const int MIXED_SIZE = -2; - int* _stats; - - public: - VectorElementSizeStats(Arena* a) : _stats(NEW_ARENA_ARRAY(a, int, 4)) { - memset(_stats, 0, sizeof(int) * 4); - } - - void record_size(int size) { - assert(1 <= size && size <= 8 && is_power_of_2(size), "Illegal size"); - _stats[exact_log2(size)]++; - } - - int smallest_size() { - for (int i = 0; i <= 3; i++) { - if (_stats[i] > 0) return (1 << i); - } - return NO_SIZE; - } - - int largest_size() { - for (int i = 3; i >= 0; i--) { - if (_stats[i] > 0) return (1 << i); - } - return NO_SIZE; - } - - int unique_size() { - int small = smallest_size(); - int large = largest_size(); - return (small == large) ? small : MIXED_SIZE; - } -}; - // -----------------------------SuperWord--------------------------------- // Transforms scalar operations into packed (superword) operations. class SuperWord : public ResourceObj { - friend class SWPointer; + friend class VPointer; friend class CMoveKit; private: PhaseIdealLoop* _phase; @@ -310,7 +267,7 @@ class SuperWord : public ResourceObj { void unrolling_analysis(int &local_loop_unroll_factor); - // Accessors for SWPointer + // Accessors for VPointer PhaseIdealLoop* phase() const { return _phase; } IdealLoopTree* lpt() const { return _lpt; } PhiNode* iv() const { return _iv; } @@ -335,7 +292,6 @@ class SuperWord : public ResourceObj { private: IdealLoopTree* _lpt; // Current loop tree node CountedLoopNode* _lp; // Current CountedLoopNode - CountedLoopEndNode* _pre_loop_end; // Current CountedLoopEndNode of pre loop VectorSet _loop_reductions; // Reduction nodes in the current loop Node* _bb; // Current basic block PhiNode* _iv; // Induction var @@ -362,25 +318,6 @@ class SuperWord : public ResourceObj { } int iv_stride() const { return lp()->stride_con(); } - CountedLoopNode* pre_loop_head() const { - assert(_pre_loop_end != nullptr && _pre_loop_end->loopnode() != nullptr, "should find head from pre loop end"); - return _pre_loop_end->loopnode(); - } - void set_pre_loop_end(CountedLoopEndNode* pre_loop_end) { - assert(pre_loop_end, "must be valid"); - _pre_loop_end = pre_loop_end; - } - CountedLoopEndNode* pre_loop_end() const { -#ifdef ASSERT - assert(_lp != nullptr, "sanity"); - assert(_pre_loop_end != nullptr, "should be set when fetched"); - Node* found_pre_end = find_pre_loop_end(_lp); - assert(_pre_loop_end == found_pre_end && _pre_loop_end == pre_loop_head()->loopexit(), - "should find the pre loop end and must be the same result"); -#endif - return _pre_loop_end; - } - int vector_width(Node* n) { BasicType bt = velt_basic_type(n); return MIN2(ABS(iv_stride()), Matcher::max_vector_size(bt)); @@ -514,7 +451,7 @@ private: #endif // If strict memory alignment is required (vectors_should_be_aligned), then check if // mem_ref is aligned with best_align_to_mem_ref. - bool mem_ref_has_no_alignment_violation(MemNode* mem_ref, int iv_adjustment, SWPointer &align_to_ref_p, + bool mem_ref_has_no_alignment_violation(MemNode* mem_ref, int iv_adjustment, VPointer& align_to_ref_p, MemNode* best_align_to_mem_ref, int best_iv_adjustment, Node_List &align_to_refs); // Find a memory reference to align the loop induction variable to. @@ -522,7 +459,7 @@ private: // Calculate loop's iv adjustment for this memory ops. int get_iv_adjustment(MemNode* mem); // Can the preloop align the reference to position zero in the vector? - bool ref_is_alignable(SWPointer& p); + bool ref_is_alignable(VPointer& p); // Construct dependency graph. void dependence_graph(); // Return a memory slice (node list) in predecessor order starting at "start" @@ -614,8 +551,6 @@ private: // Adjust pre-loop limit so that in main loop, a load/store reference // to align_to_ref will be a position zero in the vector. void align_initial_loop_index(MemNode* align_to_ref); - // Find pre loop end from main loop. Returns null if none. - CountedLoopEndNode* find_pre_loop_end(CountedLoopNode *cl) const; // Is the use of d1 in u1 at the same operand position as d2 in u2? bool opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2); void init(); @@ -629,176 +564,4 @@ private: void packset_sort(int n); }; - - -//------------------------------SWPointer--------------------------- -// Information about an address for dependence checking and vector alignment -class SWPointer : public ArenaObj { - protected: - MemNode* _mem; // My memory reference node - SuperWord* _slp; // SuperWord class - - Node* _base; // null if unsafe nonheap reference - Node* _adr; // address pointer - int _scale; // multiplier for iv (in bytes), 0 if no loop iv - int _offset; // constant offset (in bytes) - - Node* _invar; // invariant offset (in bytes), null if none -#ifdef ASSERT - Node* _debug_invar; - bool _debug_negate_invar; // if true then use: (0 - _invar) - Node* _debug_invar_scale; // multiplier for invariant -#endif - - Node_Stack* _nstack; // stack used to record a swpointer trace of variants - bool _analyze_only; // Used in loop unrolling only for swpointer trace - uint _stack_idx; // Used in loop unrolling only for swpointer trace - - PhaseIdealLoop* phase() const { return _slp->phase(); } - IdealLoopTree* lpt() const { return _slp->lpt(); } - PhiNode* iv() const { return _slp->iv(); } // Induction var - - bool is_loop_member(Node* n) const; - bool invariant(Node* n) const; - - // Match: k*iv + offset - bool scaled_iv_plus_offset(Node* n); - // Match: k*iv where k is a constant that's not zero - bool scaled_iv(Node* n); - // Match: offset is (k [+/- invariant]) - bool offset_plus_k(Node* n, bool negate = false); - - public: - enum CMP { - Less = 1, - Greater = 2, - Equal = 4, - NotEqual = (Less | Greater), - NotComparable = (Less | Greater | Equal) - }; - - SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only); - // Following is used to create a temporary object during - // the pattern match of an address expression. - SWPointer(SWPointer* p); - - bool valid() { return _adr != nullptr; } - bool has_iv() { return _scale != 0; } - - Node* base() { return _base; } - Node* adr() { return _adr; } - MemNode* mem() { return _mem; } - int scale_in_bytes() { return _scale; } - Node* invar() { return _invar; } - int offset_in_bytes() { return _offset; } - int memory_size() { return _mem->memory_size(); } - Node_Stack* node_stack() { return _nstack; } - - // Comparable? - bool invar_equals(SWPointer& q) { - assert(_debug_invar == NodeSentinel || q._debug_invar == NodeSentinel || - (_invar == q._invar) == (_debug_invar == q._debug_invar && - _debug_invar_scale == q._debug_invar_scale && - _debug_negate_invar == q._debug_negate_invar), ""); - return _invar == q._invar; - } - - int cmp(SWPointer& q) { - if (valid() && q.valid() && - (_adr == q._adr || (_base == _adr && q._base == q._adr)) && - _scale == q._scale && invar_equals(q)) { - bool overlap = q._offset < _offset + memory_size() && - _offset < q._offset + q.memory_size(); - return overlap ? Equal : (_offset < q._offset ? Less : Greater); - } else { - return NotComparable; - } - } - - bool not_equal(SWPointer& q) { return not_equal(cmp(q)); } - bool equal(SWPointer& q) { return equal(cmp(q)); } - bool comparable(SWPointer& q) { return comparable(cmp(q)); } - static bool not_equal(int cmp) { return cmp <= NotEqual; } - static bool equal(int cmp) { return cmp == Equal; } - static bool comparable(int cmp) { return cmp < NotComparable; } - - void print(); - -#ifndef PRODUCT - class Tracer { - friend class SuperWord; - friend class SWPointer; - SuperWord* _slp; - static int _depth; - int _depth_save; - void print_depth() const; - int depth() const { return _depth; } - void set_depth(int d) { _depth = d; } - void inc_depth() { _depth++;} - void dec_depth() { if (_depth > 0) _depth--;} - void store_depth() {_depth_save = _depth;} - void restore_depth() {_depth = _depth_save;} - - class Depth { - friend class Tracer; - friend class SWPointer; - friend class SuperWord; - Depth() { ++_depth; } - Depth(int x) { _depth = 0; } - ~Depth() { if (_depth > 0) --_depth;} - }; - Tracer (SuperWord* slp) : _slp(slp) {} - - // tracing functions - void ctor_1(Node* mem); - void ctor_2(Node* adr); - void ctor_3(Node* adr, int i); - void ctor_4(Node* adr, int i); - void ctor_5(Node* adr, Node* base, int i); - void ctor_6(Node* mem); - - void invariant_1(Node *n, Node *n_c) const; - - void scaled_iv_plus_offset_1(Node* n); - void scaled_iv_plus_offset_2(Node* n); - void scaled_iv_plus_offset_3(Node* n); - void scaled_iv_plus_offset_4(Node* n); - void scaled_iv_plus_offset_5(Node* n); - void scaled_iv_plus_offset_6(Node* n); - void scaled_iv_plus_offset_7(Node* n); - void scaled_iv_plus_offset_8(Node* n); - - void scaled_iv_1(Node* n); - void scaled_iv_2(Node* n, int scale); - void scaled_iv_3(Node* n, int scale); - void scaled_iv_4(Node* n, int scale); - void scaled_iv_5(Node* n, int scale); - void scaled_iv_6(Node* n, int scale); - void scaled_iv_7(Node* n); - void scaled_iv_8(Node* n, SWPointer* tmp); - void scaled_iv_9(Node* n, int _scale, int _offset, Node* _invar); - void scaled_iv_10(Node* n); - - void offset_plus_k_1(Node* n); - void offset_plus_k_2(Node* n, int _offset); - void offset_plus_k_3(Node* n, int _offset); - void offset_plus_k_4(Node* n); - void offset_plus_k_5(Node* n, Node* _invar); - void offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset); - void offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset); - void offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset); - void offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset); - void offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset); - void offset_plus_k_11(Node* n); - - } _tracer;//TRacer; -#endif - - Node* maybe_negate_invar(bool negate, Node* invar); - - void maybe_add_to_invar(Node* new_invar, bool negate); - - Node* register_if_new(Node* n) const; -}; - #endif // SHARE_OPTO_SUPERWORD_HPP diff --git a/src/hotspot/share/opto/vectorization.cpp b/src/hotspot/share/opto/vectorization.cpp new file mode 100644 index 00000000000..7158f400929 --- /dev/null +++ b/src/hotspot/share/opto/vectorization.cpp @@ -0,0 +1,689 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "opto/addnode.hpp" +#include "opto/connode.hpp" +#include "opto/convertnode.hpp" +#include "opto/matcher.hpp" +#include "opto/mulnode.hpp" +#include "opto/rootnode.hpp" +#include "opto/vectorization.hpp" + +#ifndef PRODUCT +int VPointer::Tracer::_depth = 0; +#endif + +VPointer::VPointer(MemNode* mem, PhaseIdealLoop* phase, IdealLoopTree* lpt, + Node_Stack* nstack, bool analyze_only) : + _mem(mem), _phase(phase), _lpt(lpt), + _iv(lpt->_head->as_CountedLoop()->phi()->as_Phi()), + _base(nullptr), _adr(nullptr), _scale(0), _offset(0), _invar(nullptr), +#ifdef ASSERT + _debug_invar(nullptr), _debug_negate_invar(false), _debug_invar_scale(nullptr), +#endif + _nstack(nstack), _analyze_only(analyze_only), _stack_idx(0) +#ifndef PRODUCT + , _tracer((phase->C->directive()->VectorizeDebugOption & 2) > 0) +#endif +{ + NOT_PRODUCT(_tracer.ctor_1(mem);) + + Node* adr = mem->in(MemNode::Address); + if (!adr->is_AddP()) { + assert(!valid(), "too complex"); + return; + } + // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant) + Node* base = adr->in(AddPNode::Base); + // The base address should be loop invariant + if (is_loop_member(base)) { + assert(!valid(), "base address is loop variant"); + return; + } + // unsafe references require misaligned vector access support + if (base->is_top() && !Matcher::misaligned_vectors_ok()) { + assert(!valid(), "unsafe access"); + return; + } + + NOT_PRODUCT(if(_tracer._is_trace_alignment) _tracer.store_depth();) + NOT_PRODUCT(_tracer.ctor_2(adr);) + + int i; + for (i = 0; ; i++) { + NOT_PRODUCT(_tracer.ctor_3(adr, i);) + + if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { + assert(!valid(), "too complex"); + return; + } + adr = adr->in(AddPNode::Address); + NOT_PRODUCT(_tracer.ctor_4(adr, i);) + + if (base == adr || !adr->is_AddP()) { + NOT_PRODUCT(_tracer.ctor_5(adr, base, i);) + break; // stop looking at addp's + } + } + if (is_loop_member(adr)) { + assert(!valid(), "adr is loop variant"); + return; + } + + if (!base->is_top() && adr != base) { + assert(!valid(), "adr and base differ"); + return; + } + + NOT_PRODUCT(if(_tracer._is_trace_alignment) _tracer.restore_depth();) + NOT_PRODUCT(_tracer.ctor_6(mem);) + + _base = base; + _adr = adr; + assert(valid(), "Usable"); +} + +// Following is used to create a temporary object during +// the pattern match of an address expression. +VPointer::VPointer(VPointer* p) : + _mem(p->_mem), _phase(p->_phase), _lpt(p->_lpt), _iv(p->_iv), + _base(nullptr), _adr(nullptr), _scale(0), _offset(0), _invar(nullptr), +#ifdef ASSERT + _debug_invar(nullptr), _debug_negate_invar(false), _debug_invar_scale(nullptr), +#endif + _nstack(p->_nstack), _analyze_only(p->_analyze_only), _stack_idx(p->_stack_idx) +#ifndef PRODUCT + , _tracer(p->_tracer._is_trace_alignment) +#endif +{} + +bool VPointer::is_loop_member(Node* n) const { + Node* n_c = phase()->get_ctrl(n); + return lpt()->is_member(phase()->get_loop(n_c)); +} + +bool VPointer::invariant(Node* n) const { + NOT_PRODUCT(Tracer::Depth dd;) + bool is_not_member = !is_loop_member(n); + if (is_not_member) { + CountedLoopNode* cl = lpt()->_head->as_CountedLoop(); + if (cl->is_main_loop()) { + // Check that n_c dominates the pre loop head node. If it does not, then + // we cannot use n as invariant for the pre loop CountedLoopEndNode check + // because n_c is either part of the pre loop or between the pre and the + // main loop (Illegal invariant happens when n_c is a CastII node that + // prevents data nodes to flow above the main loop). + Node* n_c = phase()->get_ctrl(n); + return phase()->is_dominator(n_c, cl->pre_loop_head()); + } + } + return is_not_member; +} + +// Match: k*iv + offset +// where: k is a constant that maybe zero, and +// offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional +bool VPointer::scaled_iv_plus_offset(Node* n) { + NOT_PRODUCT(Tracer::Depth ddd;) + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_1(n);) + + if (scaled_iv(n)) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_2(n);) + return true; + } + + if (offset_plus_k(n)) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_3(n);) + return true; + } + + int opc = n->Opcode(); + if (opc == Op_AddI) { + if (offset_plus_k(n->in(2)) && scaled_iv_plus_offset(n->in(1))) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_4(n);) + return true; + } + if (offset_plus_k(n->in(1)) && scaled_iv_plus_offset(n->in(2))) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_5(n);) + return true; + } + } else if (opc == Op_SubI || opc == Op_SubL) { + if (offset_plus_k(n->in(2), true) && scaled_iv_plus_offset(n->in(1))) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_6(n);) + return true; + } + if (offset_plus_k(n->in(1)) && scaled_iv_plus_offset(n->in(2))) { + _scale *= -1; + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_7(n);) + return true; + } + } + + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_8(n);) + return false; +} + +// Match: k*iv where k is a constant that's not zero +bool VPointer::scaled_iv(Node* n) { + NOT_PRODUCT(Tracer::Depth ddd;) + NOT_PRODUCT(_tracer.scaled_iv_1(n);) + + if (_scale != 0) { // already found a scale + NOT_PRODUCT(_tracer.scaled_iv_2(n, _scale);) + return false; + } + + if (n == iv()) { + _scale = 1; + NOT_PRODUCT(_tracer.scaled_iv_3(n, _scale);) + return true; + } + if (_analyze_only && (is_loop_member(n))) { + _nstack->push(n, _stack_idx++); + } + + int opc = n->Opcode(); + if (opc == Op_MulI) { + if (n->in(1) == iv() && n->in(2)->is_Con()) { + _scale = n->in(2)->get_int(); + NOT_PRODUCT(_tracer.scaled_iv_4(n, _scale);) + return true; + } else if (n->in(2) == iv() && n->in(1)->is_Con()) { + _scale = n->in(1)->get_int(); + NOT_PRODUCT(_tracer.scaled_iv_5(n, _scale);) + return true; + } + } else if (opc == Op_LShiftI) { + if (n->in(1) == iv() && n->in(2)->is_Con()) { + _scale = 1 << n->in(2)->get_int(); + NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);) + return true; + } + } else if (opc == Op_ConvI2L || opc == Op_CastII) { + if (scaled_iv_plus_offset(n->in(1))) { + NOT_PRODUCT(_tracer.scaled_iv_7(n);) + return true; + } + } else if (opc == Op_LShiftL && n->in(2)->is_Con()) { + if (!has_iv()) { + // Need to preserve the current _offset value, so + // create a temporary object for this expression subtree. + // Hacky, so should re-engineer the address pattern match. + NOT_PRODUCT(Tracer::Depth dddd;) + VPointer tmp(this); + NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);) + + if (tmp.scaled_iv_plus_offset(n->in(1))) { + int scale = n->in(2)->get_int(); + _scale = tmp._scale << scale; + _offset += tmp._offset << scale; + if (tmp._invar != nullptr) { + BasicType bt = tmp._invar->bottom_type()->basic_type(); + assert(bt == T_INT || bt == T_LONG, ""); + maybe_add_to_invar(register_if_new(LShiftNode::make(tmp._invar, n->in(2), bt)), false); +#ifdef ASSERT + _debug_invar_scale = n->in(2); +#endif + } + NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, _invar);) + return true; + } + } + } + NOT_PRODUCT(_tracer.scaled_iv_10(n);) + return false; +} + +// Match: offset is (k [+/- invariant]) +// where k maybe zero and invariant is optional, but not both. +bool VPointer::offset_plus_k(Node* n, bool negate) { + NOT_PRODUCT(Tracer::Depth ddd;) + NOT_PRODUCT(_tracer.offset_plus_k_1(n);) + + int opc = n->Opcode(); + if (opc == Op_ConI) { + _offset += negate ? -(n->get_int()) : n->get_int(); + NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);) + return true; + } else if (opc == Op_ConL) { + // Okay if value fits into an int + const TypeLong* t = n->find_long_type(); + if (t->higher_equal(TypeLong::INT)) { + jlong loff = n->get_long(); + jint off = (jint)loff; + _offset += negate ? -off : loff; + NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);) + return true; + } + NOT_PRODUCT(_tracer.offset_plus_k_4(n);) + return false; + } + assert((_debug_invar == nullptr) == (_invar == nullptr), ""); + + if (_analyze_only && is_loop_member(n)) { + _nstack->push(n, _stack_idx++); + } + if (opc == Op_AddI) { + if (n->in(2)->is_Con() && invariant(n->in(1))) { + maybe_add_to_invar(n->in(1), negate); + _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); + NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, negate, _offset);) + return true; + } else if (n->in(1)->is_Con() && invariant(n->in(2))) { + _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); + maybe_add_to_invar(n->in(2), negate); + NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, negate, _offset);) + return true; + } + } + if (opc == Op_SubI) { + if (n->in(2)->is_Con() && invariant(n->in(1))) { + maybe_add_to_invar(n->in(1), negate); + _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); + NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, negate, _offset);) + return true; + } else if (n->in(1)->is_Con() && invariant(n->in(2))) { + _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); + maybe_add_to_invar(n->in(2), !negate); + NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, !negate, _offset);) + return true; + } + } + + if (!is_loop_member(n)) { + // 'n' is loop invariant. Skip ConvI2L and CastII nodes before checking if 'n' is dominating the pre loop. + if (opc == Op_ConvI2L) { + n = n->in(1); + } + if (n->Opcode() == Op_CastII) { + // Skip CastII nodes + assert(!is_loop_member(n), "sanity"); + n = n->in(1); + } + // Check if 'n' can really be used as invariant (not in main loop and dominating the pre loop). + if (invariant(n)) { + maybe_add_to_invar(n, negate); + NOT_PRODUCT(_tracer.offset_plus_k_10(n, _invar, negate, _offset);) + return true; + } + } + + NOT_PRODUCT(_tracer.offset_plus_k_11(n);) + return false; +} + +Node* VPointer::maybe_negate_invar(bool negate, Node* invar) { +#ifdef ASSERT + _debug_negate_invar = negate; +#endif + if (negate) { + BasicType bt = invar->bottom_type()->basic_type(); + assert(bt == T_INT || bt == T_LONG, ""); + PhaseIterGVN& igvn = phase()->igvn(); + Node* zero = igvn.zerocon(bt); + phase()->set_ctrl(zero, phase()->C->root()); + Node* sub = SubNode::make(zero, invar, bt); + invar = register_if_new(sub); + } + return invar; +} + +Node* VPointer::register_if_new(Node* n) const { + PhaseIterGVN& igvn = phase()->igvn(); + Node* prev = igvn.hash_find_insert(n); + if (prev != nullptr) { + n->destruct(&igvn); + n = prev; + } else { + Node* c = phase()->get_early_ctrl(n); + phase()->register_new_node(n, c); + } + return n; +} + +void VPointer::maybe_add_to_invar(Node* new_invar, bool negate) { + new_invar = maybe_negate_invar(negate, new_invar); + if (_invar == nullptr) { + _invar = new_invar; +#ifdef ASSERT + _debug_invar = new_invar; +#endif + return; + } +#ifdef ASSERT + _debug_invar = NodeSentinel; +#endif + BasicType new_invar_bt = new_invar->bottom_type()->basic_type(); + assert(new_invar_bt == T_INT || new_invar_bt == T_LONG, ""); + BasicType invar_bt = _invar->bottom_type()->basic_type(); + assert(invar_bt == T_INT || invar_bt == T_LONG, ""); + + BasicType bt = (new_invar_bt == T_LONG || invar_bt == T_LONG) ? T_LONG : T_INT; + Node* current_invar = _invar; + if (invar_bt != bt) { + assert(bt == T_LONG && invar_bt == T_INT, ""); + assert(new_invar_bt == bt, ""); + current_invar = register_if_new(new ConvI2LNode(current_invar)); + } else if (new_invar_bt != bt) { + assert(bt == T_LONG && new_invar_bt == T_INT, ""); + assert(invar_bt == bt, ""); + new_invar = register_if_new(new ConvI2LNode(new_invar)); + } + Node* add = AddNode::make(current_invar, new_invar, bt); + _invar = register_if_new(add); +} + +// Function for printing the fields of a VPointer +void VPointer::print() { +#ifndef PRODUCT + tty->print("base: [%d] adr: [%d] scale: %d offset: %d", + _base != nullptr ? _base->_idx : 0, + _adr != nullptr ? _adr->_idx : 0, + _scale, _offset); + if (_invar != nullptr) { + tty->print(" invar: [%d]", _invar->_idx); + } + tty->cr(); +#endif +} + +// Following are functions for tracing VPointer match +#ifndef PRODUCT +void VPointer::Tracer::print_depth() const { + for (int ii = 0; ii < _depth; ++ii) { + tty->print(" "); + } +} + +void VPointer::Tracer::ctor_1(Node* mem) { + if (_is_trace_alignment) { + print_depth(); tty->print(" %d VPointer::VPointer: start alignment analysis", mem->_idx); mem->dump(); + } +} + +void VPointer::Tracer::ctor_2(Node* adr) { + if (_is_trace_alignment) { + //store_depth(); + inc_depth(); + print_depth(); tty->print(" %d (adr) VPointer::VPointer: ", adr->_idx); adr->dump(); + inc_depth(); + print_depth(); tty->print(" %d (base) VPointer::VPointer: ", adr->in(AddPNode::Base)->_idx); adr->in(AddPNode::Base)->dump(); + } +} + +void VPointer::Tracer::ctor_3(Node* adr, int i) { + if (_is_trace_alignment) { + inc_depth(); + Node* offset = adr->in(AddPNode::Offset); + print_depth(); tty->print(" %d (offset) VPointer::VPointer: i = %d: ", offset->_idx, i); offset->dump(); + } +} + +void VPointer::Tracer::ctor_4(Node* adr, int i) { + if (_is_trace_alignment) { + inc_depth(); + print_depth(); tty->print(" %d (adr) VPointer::VPointer: i = %d: ", adr->_idx, i); adr->dump(); + } +} + +void VPointer::Tracer::ctor_5(Node* adr, Node* base, int i) { + if (_is_trace_alignment) { + inc_depth(); + if (base == adr) { + print_depth(); tty->print_cr(" \\ %d (adr) == %d (base) VPointer::VPointer: breaking analysis at i = %d", adr->_idx, base->_idx, i); + } else if (!adr->is_AddP()) { + print_depth(); tty->print_cr(" \\ %d (adr) is NOT Addp VPointer::VPointer: breaking analysis at i = %d", adr->_idx, i); + } + } +} + +void VPointer::Tracer::ctor_6(Node* mem) { + if (_is_trace_alignment) { + //restore_depth(); + print_depth(); tty->print_cr(" %d (adr) VPointer::VPointer: stop analysis", mem->_idx); + } +} + +void VPointer::Tracer::scaled_iv_plus_offset_1(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print(" %d VPointer::scaled_iv_plus_offset testing node: ", n->_idx); + n->dump(); + } +} + +void VPointer::Tracer::scaled_iv_plus_offset_2(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv_plus_offset: PASSED", n->_idx); + } +} + +void VPointer::Tracer::scaled_iv_plus_offset_3(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv_plus_offset: PASSED", n->_idx); + } +} + +void VPointer::Tracer::scaled_iv_plus_offset_4(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump(); + } +} + +void VPointer::Tracer::scaled_iv_plus_offset_5(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump(); + } +} + +void VPointer::Tracer::scaled_iv_plus_offset_6(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv_plus_offset: Op_%s PASSED", n->_idx, n->Name()); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump(); + } +} + +void VPointer::Tracer::scaled_iv_plus_offset_7(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv_plus_offset: Op_%s PASSED", n->_idx, n->Name()); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump(); + } +} + +void VPointer::Tracer::scaled_iv_plus_offset_8(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv_plus_offset: FAILED", n->_idx); + } +} + +void VPointer::Tracer::scaled_iv_1(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print(" %d VPointer::scaled_iv: testing node: ", n->_idx); n->dump(); + } +} + +void VPointer::Tracer::scaled_iv_2(Node* n, int scale) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv: FAILED since another _scale has been detected before", n->_idx); + print_depth(); tty->print_cr(" \\ VPointer::scaled_iv: _scale (%d) != 0", scale); + } +} + +void VPointer::Tracer::scaled_iv_3(Node* n, int scale) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv: is iv, setting _scale = %d", n->_idx, scale); + } +} + +void VPointer::Tracer::scaled_iv_4(Node* n, int scale) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); + } +} + +void VPointer::Tracer::scaled_iv_5(Node* n, int scale) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv: in(2) is iv: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); + } +} + +void VPointer::Tracer::scaled_iv_6(Node* n, int scale) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv: Op_LShiftI PASSED, setting _scale = %d", n->_idx, scale); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d VPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); + } +} + +void VPointer::Tracer::scaled_iv_7(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv: Op_ConvI2L PASSED", n->_idx); + print_depth(); tty->print_cr(" \\ VPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset: ", n->in(1)->_idx); + inc_depth(); inc_depth(); + print_depth(); n->in(1)->dump(); + dec_depth(); dec_depth(); + } +} + +void VPointer::Tracer::scaled_iv_8(Node* n, VPointer* tmp) { + if (_is_trace_alignment) { + print_depth(); tty->print(" %d VPointer::scaled_iv: Op_LShiftL, creating tmp VPointer: ", n->_idx); tmp->print(); + } +} + +void VPointer::Tracer::scaled_iv_9(Node* n, int scale, int offset, Node* invar) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv: Op_LShiftL PASSED, setting _scale = %d, _offset = %d", n->_idx, scale, offset); + print_depth(); tty->print_cr(" \\ VPointer::scaled_iv: in(1) [%d] is scaled_iv_plus_offset, in(2) [%d] used to scale: _scale = %d, _offset = %d", + n->in(1)->_idx, n->in(2)->_idx, scale, offset); + if (invar != nullptr) { + print_depth(); tty->print_cr(" \\ VPointer::scaled_iv: scaled invariant: [%d]", invar->_idx); + } + inc_depth(); inc_depth(); + print_depth(); n->in(1)->dump(); + print_depth(); n->in(2)->dump(); + if (invar != nullptr) { + print_depth(); invar->dump(); + } + dec_depth(); dec_depth(); + } +} + +void VPointer::Tracer::scaled_iv_10(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::scaled_iv: FAILED", n->_idx); + } +} + +void VPointer::Tracer::offset_plus_k_1(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print(" %d VPointer::offset_plus_k: testing node: ", n->_idx); n->dump(); + } +} + +void VPointer::Tracer::offset_plus_k_2(Node* n, int _offset) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::offset_plus_k: Op_ConI PASSED, setting _offset = %d", n->_idx, _offset); + } +} + +void VPointer::Tracer::offset_plus_k_3(Node* n, int _offset) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::offset_plus_k: Op_ConL PASSED, setting _offset = %d", n->_idx, _offset); + } +} + +void VPointer::Tracer::offset_plus_k_4(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::offset_plus_k: FAILED", n->_idx); + print_depth(); tty->print_cr(" \\ " JLONG_FORMAT " VPointer::offset_plus_k: Op_ConL FAILED, k is too big", n->get_long()); + } +} + +void VPointer::Tracer::offset_plus_k_5(Node* n, Node* _invar) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::offset_plus_k: FAILED since another invariant has been detected before", n->_idx); + print_depth(); tty->print(" \\ %d VPointer::offset_plus_k: _invar is not null: ", _invar->_idx); _invar->dump(); + } +} + +void VPointer::Tracer::offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::offset_plus_k: Op_AddI PASSED, setting _debug_negate_invar = %d, _invar = %d, _offset = %d", + n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print(" \\ %d VPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d VPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump(); + } +} + +void VPointer::Tracer::offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::offset_plus_k: Op_AddI PASSED, setting _debug_negate_invar = %d, _invar = %d, _offset = %d", + n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print(" \\ %d VPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d VPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump(); + } +} + +void VPointer::Tracer::offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::offset_plus_k: Op_SubI is PASSED, setting _debug_negate_invar = %d, _invar = %d, _offset = %d", + n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print(" \\ %d VPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d VPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump(); + } +} + +void VPointer::Tracer::offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::offset_plus_k: Op_SubI PASSED, setting _debug_negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print(" \\ %d VPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d VPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump(); + } +} + +void VPointer::Tracer::offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::offset_plus_k: PASSED, setting _debug_negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print_cr(" \\ %d VPointer::offset_plus_k: is invariant", n->_idx); + } +} + +void VPointer::Tracer::offset_plus_k_11(Node* n) { + if (_is_trace_alignment) { + print_depth(); tty->print_cr(" %d VPointer::offset_plus_k: FAILED", n->_idx); + } +} + +#endif diff --git a/src/hotspot/share/opto/vectorization.hpp b/src/hotspot/share/opto/vectorization.hpp new file mode 100644 index 00000000000..f53b8299986 --- /dev/null +++ b/src/hotspot/share/opto/vectorization.hpp @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef SHARE_OPTO_VECTORIZATION_HPP +#define SHARE_OPTO_VECTORIZATION_HPP + +#include "opto/node.hpp" +#include "opto/loopnode.hpp" + +// Code in this file and the vectorization.cpp contains shared logics and +// utilities for C2's loop auto-vectorization. + +// A vectorization pointer (VPointer) has information about an address for +// dependence checking and vector alignment. It's usually bound to a memory +// operation in a counted loop for vectorizable analysis. +class VPointer : public ArenaObj { + protected: + MemNode* _mem; // My memory reference node + PhaseIdealLoop* _phase; // PhaseIdealLoop handle + IdealLoopTree* _lpt; // Current IdealLoopTree + PhiNode* _iv; // The loop induction variable + + Node* _base; // null if unsafe nonheap reference + Node* _adr; // address pointer + int _scale; // multiplier for iv (in bytes), 0 if no loop iv + int _offset; // constant offset (in bytes) + + Node* _invar; // invariant offset (in bytes), null if none +#ifdef ASSERT + Node* _debug_invar; + bool _debug_negate_invar; // if true then use: (0 - _invar) + Node* _debug_invar_scale; // multiplier for invariant +#endif + + Node_Stack* _nstack; // stack used to record a vpointer trace of variants + bool _analyze_only; // Used in loop unrolling only for vpointer trace + uint _stack_idx; // Used in loop unrolling only for vpointer trace + + PhaseIdealLoop* phase() const { return _phase; } + IdealLoopTree* lpt() const { return _lpt; } + PhiNode* iv() const { return _iv; } + + bool is_loop_member(Node* n) const; + bool invariant(Node* n) const; + + // Match: k*iv + offset + bool scaled_iv_plus_offset(Node* n); + // Match: k*iv where k is a constant that's not zero + bool scaled_iv(Node* n); + // Match: offset is (k [+/- invariant]) + bool offset_plus_k(Node* n, bool negate = false); + + public: + enum CMP { + Less = 1, + Greater = 2, + Equal = 4, + NotEqual = (Less | Greater), + NotComparable = (Less | Greater | Equal) + }; + + VPointer(MemNode* mem, PhaseIdealLoop* phase, IdealLoopTree* lpt, + Node_Stack* nstack, bool analyze_only); + // Following is used to create a temporary object during + // the pattern match of an address expression. + VPointer(VPointer* p); + + bool valid() { return _adr != nullptr; } + bool has_iv() { return _scale != 0; } + + Node* base() { return _base; } + Node* adr() { return _adr; } + MemNode* mem() { return _mem; } + int scale_in_bytes() { return _scale; } + Node* invar() { return _invar; } + int offset_in_bytes() { return _offset; } + int memory_size() { return _mem->memory_size(); } + Node_Stack* node_stack() { return _nstack; } + + // Comparable? + bool invar_equals(VPointer& q) { + assert(_debug_invar == NodeSentinel || q._debug_invar == NodeSentinel || + (_invar == q._invar) == (_debug_invar == q._debug_invar && + _debug_invar_scale == q._debug_invar_scale && + _debug_negate_invar == q._debug_negate_invar), ""); + return _invar == q._invar; + } + + int cmp(VPointer& q) { + if (valid() && q.valid() && + (_adr == q._adr || (_base == _adr && q._base == q._adr)) && + _scale == q._scale && invar_equals(q)) { + bool overlap = q._offset < _offset + memory_size() && + _offset < q._offset + q.memory_size(); + return overlap ? Equal : (_offset < q._offset ? Less : Greater); + } else { + return NotComparable; + } + } + + bool not_equal(VPointer& q) { return not_equal(cmp(q)); } + bool equal(VPointer& q) { return equal(cmp(q)); } + bool comparable(VPointer& q) { return comparable(cmp(q)); } + static bool not_equal(int cmp) { return cmp <= NotEqual; } + static bool equal(int cmp) { return cmp == Equal; } + static bool comparable(int cmp) { return cmp < NotComparable; } + + void print(); + +#ifndef PRODUCT + class Tracer { + friend class VPointer; + bool _is_trace_alignment; + static int _depth; + int _depth_save; + void print_depth() const; + int depth() const { return _depth; } + void set_depth(int d) { _depth = d; } + void inc_depth() { _depth++; } + void dec_depth() { if (_depth > 0) _depth--; } + void store_depth() { _depth_save = _depth; } + void restore_depth() { _depth = _depth_save; } + + class Depth { + friend class VPointer; + Depth() { ++_depth; } + Depth(int x) { _depth = 0; } + ~Depth() { if (_depth > 0) --_depth; } + }; + Tracer(bool is_trace_alignment) : _is_trace_alignment(is_trace_alignment) {} + + // tracing functions + void ctor_1(Node* mem); + void ctor_2(Node* adr); + void ctor_3(Node* adr, int i); + void ctor_4(Node* adr, int i); + void ctor_5(Node* adr, Node* base, int i); + void ctor_6(Node* mem); + + void scaled_iv_plus_offset_1(Node* n); + void scaled_iv_plus_offset_2(Node* n); + void scaled_iv_plus_offset_3(Node* n); + void scaled_iv_plus_offset_4(Node* n); + void scaled_iv_plus_offset_5(Node* n); + void scaled_iv_plus_offset_6(Node* n); + void scaled_iv_plus_offset_7(Node* n); + void scaled_iv_plus_offset_8(Node* n); + + void scaled_iv_1(Node* n); + void scaled_iv_2(Node* n, int scale); + void scaled_iv_3(Node* n, int scale); + void scaled_iv_4(Node* n, int scale); + void scaled_iv_5(Node* n, int scale); + void scaled_iv_6(Node* n, int scale); + void scaled_iv_7(Node* n); + void scaled_iv_8(Node* n, VPointer* tmp); + void scaled_iv_9(Node* n, int _scale, int _offset, Node* _invar); + void scaled_iv_10(Node* n); + + void offset_plus_k_1(Node* n); + void offset_plus_k_2(Node* n, int _offset); + void offset_plus_k_3(Node* n, int _offset); + void offset_plus_k_4(Node* n); + void offset_plus_k_5(Node* n, Node* _invar); + void offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_11(Node* n); + } _tracer; // Tracer +#endif + + Node* maybe_negate_invar(bool negate, Node* invar); + + void maybe_add_to_invar(Node* new_invar, bool negate); + + Node* register_if_new(Node* n) const; +}; + + +// Vector element size statistics for loop vectorization with vector masks +class VectorElementSizeStats { + private: + static const int NO_SIZE = -1; + static const int MIXED_SIZE = -2; + int* _stats; + + public: + VectorElementSizeStats(Arena* a) : _stats(NEW_ARENA_ARRAY(a, int, 4)) { + clear(); + } + + void clear() { memset(_stats, 0, sizeof(int) * 4); } + + void record_size(int size) { + assert(1 <= size && size <= 8 && is_power_of_2(size), "Illegal size"); + _stats[exact_log2(size)]++; + } + + int count_size(int size) { + assert(1 <= size && size <= 8 && is_power_of_2(size), "Illegal size"); + return _stats[exact_log2(size)]; + } + + int smallest_size() { + for (int i = 0; i <= 3; i++) { + if (_stats[i] > 0) return (1 << i); + } + return NO_SIZE; + } + + int largest_size() { + for (int i = 3; i >= 0; i--) { + if (_stats[i] > 0) return (1 << i); + } + return NO_SIZE; + } + + int unique_size() { + int small = smallest_size(); + int large = largest_size(); + return (small == large) ? small : MIXED_SIZE; + } +}; + +#endif // SHARE_OPTO_VECTORIZATION_HPP