From 2a4f18ccbde63c766ee95f957083f52471acecc8 Mon Sep 17 00:00:00 2001 From: Jan Civlin Date: Tue, 14 Jul 2015 09:55:22 -0700 Subject: [PATCH] 8085932: Fixing bugs in detecting memory alignments in SuperWord Reviewed-by: kvn --- hotspot/src/share/vm/opto/compile.hpp | 3 + hotspot/src/share/vm/opto/loopnode.cpp | 3 - hotspot/src/share/vm/opto/superword.cpp | 516 +++++++++++++++++++++--- hotspot/src/share/vm/opto/superword.hpp | 101 ++++- 4 files changed, 565 insertions(+), 58 deletions(-) diff --git a/hotspot/src/share/vm/opto/compile.hpp b/hotspot/src/share/vm/opto/compile.hpp index 44153850603..a22695fcdce 100644 --- a/hotspot/src/share/vm/opto/compile.hpp +++ b/hotspot/src/share/vm/opto/compile.hpp @@ -140,6 +140,9 @@ class CloneMap { bool is_debug() const { return _debug; } void set_debug(bool debug) { _debug = debug; } static const char* debug_option_name; + + bool same_idx(node_idx_t k1, node_idx_t k2) const { return idx(k1) == idx(k2); } + bool same_gen(node_idx_t k1, node_idx_t k2) const { return gen(k1) == gen(k2); } }; //------------------------------Compile---------------------------------------- diff --git a/hotspot/src/share/vm/opto/loopnode.cpp b/hotspot/src/share/vm/opto/loopnode.cpp index 6d6024718e2..a50afb59316 100644 --- a/hotspot/src/share/vm/opto/loopnode.cpp +++ b/hotspot/src/share/vm/opto/loopnode.cpp @@ -3682,7 +3682,6 @@ void PhaseIdealLoop::dump( ) const { } void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const { - CloneMap& cm = C->clone_map(); loop->dump_head(); // Now scan for CFG nodes in the same loop @@ -3714,7 +3713,6 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) cached_idom = find_non_split_ctrl(cached_idom); } tty->print(" ID:%d",computed_idom->_idx); - cm.dump(n->_idx); n->dump(); if( cached_idom != computed_idom ) { tty->print_cr("*** BROKEN IDOM! Computed as: %d, cached as: %d", @@ -3734,7 +3732,6 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) for( uint j = 0; j < loop->_nest; j++ ) tty->print(" "); tty->print(" "); - cm.dump(m->_idx); m->dump(); } } diff --git a/hotspot/src/share/vm/opto/superword.cpp b/hotspot/src/share/vm/opto/superword.cpp index a5b5ffc2196..65a42e4759b 100644 --- a/hotspot/src/share/vm/opto/superword.cpp +++ b/hotspot/src/share/vm/opto/superword.cpp @@ -74,9 +74,15 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) : _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style _ii_first(-1), // first loop generation index - only if do_vector_loop() _ii_last(-1), // last loop generation index - only if do_vector_loop() - _ii_order(arena(), 8, 0, 0), - _vector_loop_debug(phase->C->has_method() && phase->C->method_has_option("VectorizeDebug")) -{} + _ii_order(arena(), 8, 0, 0) +{ +#ifndef PRODUCT + _vector_loop_debug = 0; + if (_phase->C->method() != NULL) { + _phase->C->method()->has_option_value("VectorizeDebug", _vector_loop_debug); + } +#endif +} //------------------------------transform_loop--------------------------- void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { @@ -90,7 +96,6 @@ void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops - // Check for no control flow in body (other than exit) Node *cl_exit = cl->loopexit(); if (cl_exit->in(0) != lpt->_head) return; @@ -425,13 +430,15 @@ void SuperWord::find_adjacent_refs() { // this reference to a vector-aligned address. best_align_to_mem_ref = mem_ref; best_iv_adjustment = iv_adjustment; + NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);) } SWPointer align_to_ref_p(mem_ref, this, NULL, false); // Set alignment relative to "align_to_ref" for all related memory operations. for (int i = memops.size() - 1; i >= 0; i--) { MemNode* s = memops.at(i)->as_Mem(); - if (isomorphic(s, mem_ref)) { + if (isomorphic(s, mem_ref) && + (!_do_vector_loop || same_origin_idx(s, mem_ref))) { SWPointer p2(s, this, NULL, false); if (p2.comparable(align_to_ref_p)) { int align = memory_alignment(s, iv_adjustment); @@ -496,7 +503,7 @@ void SuperWord::find_adjacent_refs() { Node_List* pair = new Node_List(); pair->push(s1); pair->push(s2); - if (!_do_vector_loop || _clone_map.idx(s1->_idx) == _clone_map.idx(s2->_idx)) { + if (!_do_vector_loop || same_origin_idx(s1, s2)) { _packset.append(pair); } } @@ -533,8 +540,12 @@ void SuperWord::find_adjacent_refs() { memops.push(s); } MemNode* best_align_to_mem_ref = find_align_to_ref(memops); - if (best_align_to_mem_ref == NULL) break; + if (best_align_to_mem_ref == NULL) { + NOT_PRODUCT(if (TraceSuperWord) tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");) + break; + } best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref); + NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);) // Restore list. while (memops.size() > orig_msize) (void)memops.pop(); @@ -560,6 +571,16 @@ void SuperWord::find_adjacent_refs() { #endif } +#ifndef PRODUCT +void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment) { + if (is_trace_adjacent()) { + tty->print("SuperWord::find_adjacent_refs best_align_to_mem_ref = %d, best_iv_adjustment = %d", + best_align_to_mem_ref->_idx, best_iv_adjustment); + best_align_to_mem_ref->dump(); + } +} +#endif + //------------------------------find_align_to_ref--------------------------- // Find a memory reference to align the loop induction variable to. // Looks first at stores then at loads, looking for a memory reference @@ -756,9 +777,11 @@ int SuperWord::get_iv_adjustment(MemNode* mem_ref) { } #ifndef PRODUCT - if (TraceSuperWord) - tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d", - offset, iv_adjustment, elt_size, scale, iv_stride(), vw); + if (TraceSuperWord) { + tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ", + mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw); + mem_ref->dump(); + } #endif return iv_adjustment; } @@ -863,12 +886,14 @@ void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray &p Node* n = start; Node* prev = NULL; while (true) { + NOT_PRODUCT( if(is_trace_mem_slice()) tty->print_cr("SuperWord::mem_slice_preds: n %d", n->_idx);) assert(in_bb(n), "must be in block"); for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { Node* out = n->fast_out(i); if (out->is_Load()) { if (in_bb(out)) { preds.push(out); + NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);) } } else { // FIXME @@ -883,10 +908,11 @@ void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray &p } else { assert(out == prev || prev == NULL, "no branches off of store slice"); } - } - } + }//else + }//for if (n == stop) break; preds.push(n); + NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);) prev = n; assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name())); n = n->in(MemNode::Memory); @@ -2140,18 +2166,38 @@ bool SuperWord::construct_bb() { } else { _stk.pop(); // Remove post-visited node from stack } - } + }//while + int ii_current = -1; + unsigned int load_idx = -1; + _ii_order.clear(); // Create real map of block indices for nodes for (int j = 0; j < _block.length(); j++) { Node* n = _block.at(j); set_bb_idx(n, j); - } + if (_do_vector_loop && n->is_Load()) { + if (ii_current == -1) { + ii_current = _clone_map.gen(n->_idx); + _ii_order.push(ii_current); + load_idx = _clone_map.idx(n->_idx); + } else if (_clone_map.idx(n->_idx) == load_idx && _clone_map.gen(n->_idx) != ii_current) { + ii_current = _clone_map.gen(n->_idx); + _ii_order.push(ii_current); + } + } + }//for // Ensure extra info is allocated. initialize_bb(); #ifndef PRODUCT + if (_vector_loop_debug && _ii_order.length() > 0) { + tty->print("SuperWord::construct_bb: List of generations: "); + for (int jj = 0; jj < _ii_order.length(); ++jj) { + tty->print(" %d:%d", jj, _ii_order.at(jj)); + } + tty->print_cr(" "); + } if (TraceSuperWord) { print_bb(); tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE"); @@ -2312,18 +2358,27 @@ void SuperWord::compute_vector_element_type() { //------------------------------memory_alignment--------------------------- // Alignment within a vector memory reference int SuperWord::memory_alignment(MemNode* s, int iv_adjust) { + #ifndef PRODUCT + if(TraceSuperWord && Verbose) { + tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump(); + } + #endif + NOT_PRODUCT(SWPointer::Tracer::Depth ddd(0);) SWPointer p(s, this, NULL, false); if (!p.valid()) { + NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");) return bottom_align; } int vw = vector_width_in_bytes(s); if (vw < 2) { + NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");) return bottom_align; // No vectors for this type } int offset = p.offset_in_bytes(); offset += iv_adjust*p.memory_size(); int off_rem = offset % vw; int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; + NOT_PRODUCT(if(TraceSuperWord && Verbose) tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);) return off_mod; } @@ -2732,13 +2787,20 @@ char* SuperWord::blank(uint depth) { //==============================SWPointer=========================== - +#ifndef PRODUCT +int SWPointer::Tracer::_depth = 0; +#endif //----------------------------SWPointer------------------------ SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) : _mem(mem), _slp(slp), _base(NULL), _adr(NULL), _scale(0), _offset(0), _invar(NULL), _negate_invar(false), _nstack(nstack), _analyze_only(analyze_only), - _stack_idx(0) { + _stack_idx(0) +#ifndef PRODUCT + , _tracer(slp) +#endif +{ + NOT_PRODUCT(_tracer.ctor_1(mem);) Node* adr = mem->in(MemNode::Address); if (!adr->is_AddP()) { @@ -2757,16 +2819,29 @@ SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool anal assert(!valid(), "unsafe access"); return; } - for (int i = 0; i < 3; i++) { + + NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.store_depth();) + NOT_PRODUCT(_tracer.ctor_2(adr);) + + int i; + for (i = 0; i < 3; i++) { + NOT_PRODUCT(_tracer.ctor_3(adr, i);) + if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { assert(!valid(), "too complex"); return; } adr = adr->in(AddPNode::Address); + NOT_PRODUCT(_tracer.ctor_4(adr, i);) + if (base == adr || !adr->is_AddP()) { + NOT_PRODUCT(_tracer.ctor_5(adr, base, i);) break; // stop looking at addp's } } + NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.restore_depth();) + NOT_PRODUCT(_tracer.ctor_6(mem);) + _base = base; _adr = adr; assert(valid(), "Usable"); @@ -2778,68 +2853,103 @@ SWPointer::SWPointer(SWPointer* p) : _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL), _scale(0), _offset(0), _invar(NULL), _negate_invar(false), _nstack(p->_nstack), _analyze_only(p->_analyze_only), - _stack_idx(p->_stack_idx) {} + _stack_idx(p->_stack_idx) + #ifndef PRODUCT + , _tracer(p->_slp) + #endif +{} + +bool SWPointer::invariant(Node* n) { + NOT_PRODUCT(Tracer::Depth dd;) + Node *n_c = phase()->get_ctrl(n); + NOT_PRODUCT(_tracer.invariant_1(n, n_c);) + return !lpt()->is_member(phase()->get_loop(n_c)); +} //------------------------scaled_iv_plus_offset-------------------- // Match: k*iv + offset // where: k is a constant that maybe zero, and // offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional bool SWPointer::scaled_iv_plus_offset(Node* n) { + NOT_PRODUCT(Tracer::Depth ddd;) + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_1(n);) + if (scaled_iv(n)) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_2(n);) return true; } + if (offset_plus_k(n)) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_3(n);) return true; } + int opc = n->Opcode(); if (opc == Op_AddI) { if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_4(n);) return true; } if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_5(n);) return true; } } else if (opc == Op_SubI) { if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) { + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_6(n);) return true; } if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { _scale *= -1; + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_7(n);) return true; } } + + NOT_PRODUCT(_tracer.scaled_iv_plus_offset_8(n);) return false; } //----------------------------scaled_iv------------------------ // Match: k*iv where k is a constant that's not zero bool SWPointer::scaled_iv(Node* n) { - if (_scale != 0) { - return false; // already found a scale + NOT_PRODUCT(Tracer::Depth ddd;) + NOT_PRODUCT(_tracer.scaled_iv_1(n);) + + if (_scale != 0) { // already found a scale + NOT_PRODUCT(_tracer.scaled_iv_2(n, _scale);) + return false; } + if (n == iv()) { _scale = 1; + NOT_PRODUCT(_tracer.scaled_iv_3(n, _scale);) return true; } if (_analyze_only && (invariant(n) == false)) { _nstack->push(n, _stack_idx++); } + int opc = n->Opcode(); if (opc == Op_MulI) { if (n->in(1) == iv() && n->in(2)->is_Con()) { _scale = n->in(2)->get_int(); + NOT_PRODUCT(_tracer.scaled_iv_4(n, _scale);) return true; } else if (n->in(2) == iv() && n->in(1)->is_Con()) { _scale = n->in(1)->get_int(); + NOT_PRODUCT(_tracer.scaled_iv_5(n, _scale);) return true; } } else if (opc == Op_LShiftI) { if (n->in(1) == iv() && n->in(2)->is_Con()) { _scale = 1 << n->in(2)->get_int(); + NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);) return true; } } else if (opc == Op_ConvI2L) { if (scaled_iv_plus_offset(n->in(1))) { + NOT_PRODUCT(_tracer.scaled_iv_7(n);) return true; } } else if (opc == Op_LShiftL) { @@ -2847,17 +2957,22 @@ bool SWPointer::scaled_iv(Node* n) { // Need to preserve the current _offset value, so // create a temporary object for this expression subtree. // Hacky, so should re-engineer the address pattern match. + NOT_PRODUCT(Tracer::Depth dddd;) SWPointer tmp(this); + NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);) + if (tmp.scaled_iv_plus_offset(n->in(1))) { - if (tmp._invar == NULL) { + if (tmp._invar == NULL || _slp->do_vector_loop()) { int mult = 1 << n->in(2)->get_int(); _scale = tmp._scale * mult; _offset += tmp._offset * mult; + NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, mult);) return true; } } } } + NOT_PRODUCT(_tracer.scaled_iv_10(n);) return false; } @@ -2865,9 +2980,13 @@ bool SWPointer::scaled_iv(Node* n) { // Match: offset is (k [+/- invariant]) // where k maybe zero and invariant is optional, but not both. bool SWPointer::offset_plus_k(Node* n, bool negate) { + NOT_PRODUCT(Tracer::Depth ddd;) + NOT_PRODUCT(_tracer.offset_plus_k_1(n);) + int opc = n->Opcode(); if (opc == Op_ConI) { _offset += negate ? -(n->get_int()) : n->get_int(); + NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);) return true; } else if (opc == Op_ConL) { // Okay if value fits into an int @@ -2876,11 +2995,17 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) { jlong loff = n->get_long(); jint off = (jint)loff; _offset += negate ? -off : loff; + NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);) return true; } + NOT_PRODUCT(_tracer.offset_plus_k_4(n);) return false; } - if (_invar != NULL) return false; // already have an invariant + if (_invar != NULL) { // already has an invariant + NOT_PRODUCT(_tracer.offset_plus_k_5(n, _invar);) + return false; + } + if (_analyze_only && (invariant(n) == false)) { _nstack->push(n, _stack_idx++); } @@ -2889,11 +3014,13 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) { _negate_invar = negate; _invar = n->in(1); _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); + NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, _negate_invar, _offset);) return true; } else if (n->in(1)->is_Con() && invariant(n->in(2))) { _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); _negate_invar = negate; _invar = n->in(2); + NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, _negate_invar, _offset);) return true; } } @@ -2902,19 +3029,24 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) { _negate_invar = negate; _invar = n->in(1); _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); + NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, _negate_invar, _offset);) return true; } else if (n->in(1)->is_Con() && invariant(n->in(2))) { _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); _negate_invar = !negate; _invar = n->in(2); + NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, _negate_invar, _offset);) return true; } } if (invariant(n)) { _negate_invar = negate; _invar = n; + NOT_PRODUCT(_tracer.offset_plus_k_10(n, _invar, _negate_invar, _offset);) return true; } + + NOT_PRODUCT(_tracer.offset_plus_k_11(n);) return false; } @@ -2930,6 +3062,287 @@ void SWPointer::print() { #endif } +//----------------------------tracing------------------------ +#ifndef PRODUCT +void SWPointer::Tracer::print_depth() { + for (int ii = 0; ii<_depth; ++ii) tty->print(" "); +} + +void SWPointer::Tracer::ctor_1 (Node* mem) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print(" %d SWPointer::SWPointer: start alignment analysis", mem->_idx); mem->dump(); + } +} + +void SWPointer::Tracer::ctor_2(Node* adr) { + if(_slp->is_trace_alignment()) { + //store_depth(); + inc_depth(); + print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: ", adr->_idx); adr->dump(); + inc_depth(); + print_depth(); tty->print(" %d (base) SWPointer::SWPointer: ", adr->in(AddPNode::Base)->_idx); adr->in(AddPNode::Base)->dump(); + } +} + +void SWPointer::Tracer::ctor_3(Node* adr, int i) { + if(_slp->is_trace_alignment()) { + inc_depth(); + Node* offset = adr->in(AddPNode::Offset); + print_depth(); tty->print(" %d (offset) SWPointer::SWPointer: i = %d: ", offset->_idx, i); offset->dump(); + } +} + +void SWPointer::Tracer::ctor_4(Node* adr, int i) { + if(_slp->is_trace_alignment()) { + inc_depth(); + print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: i = %d: ", adr->_idx, i); adr->dump(); + } +} + +void SWPointer::Tracer::ctor_5(Node* adr, Node* base, int i) { + if(_slp->is_trace_alignment()) { + inc_depth(); + if (base == adr) { + print_depth(); tty->print_cr(" \\ %d (adr) == %d (base) SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, base->_idx, i); + } else if (!adr->is_AddP()) { + print_depth(); tty->print_cr(" \\ %d (adr) is NOT Addp SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, i); + } + } +} + +void SWPointer::Tracer::ctor_6(Node* mem) { + if(_slp->is_trace_alignment()) { + //restore_depth(); + print_depth(); tty->print_cr(" %d (adr) SWPointer::SWPointer: stop analysis", mem->_idx); + } +} + +void SWPointer::Tracer::invariant_1(Node *n, Node *n_c) { + if (_slp->do_vector_loop() && _slp->is_debug() && _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)) != (int)_slp->in_bb(n)) { + int is_member = _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)); + int in_bb = _slp->in_bb(n); + print_depth(); tty->print(" \\ "); tty->print_cr(" %d SWPointer::invariant conditions differ: n_c %d", n->_idx, n_c->_idx); + print_depth(); tty->print(" \\ "); tty->print_cr("is_member %d, in_bb %d", is_member, in_bb); + print_depth(); tty->print(" \\ "); n->dump(); + print_depth(); tty->print(" \\ "); n_c->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_1(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset testing node: ", n->_idx); + n->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_2(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_3(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_4(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_5(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_6(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_7(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_plus_offset_8(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: FAILED", n->_idx); + } +} + +void SWPointer::Tracer::scaled_iv_1(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print(" %d SWPointer::scaled_iv: testing node: ", n->_idx); n->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_2(Node* n, int scale) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED since another _scale has been detected before", n->_idx); + print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: _scale (%d) != 0", scale); + } +} + +void SWPointer::Tracer::scaled_iv_3(Node* n, int scale) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: is iv, setting _scale = %d", n->_idx, scale); + } +} + +void SWPointer::Tracer::scaled_iv_4(Node* n, int scale) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_5(Node* n, int scale) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is iv: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_6(Node* n, int scale) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftI PASSED, setting _scale = %d", n->_idx, scale); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); + } +} + +void SWPointer::Tracer::scaled_iv_7(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_ConvI2L PASSED", n->_idx); + print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset: ", n->in(1)->_idx); + inc_depth(); inc_depth(); + print_depth(); n->in(1)->dump(); + dec_depth(); dec_depth(); + } +} + +void SWPointer::Tracer::scaled_iv_8(Node* n, SWPointer* tmp) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print(" %d SWPointer::scaled_iv: Op_LShiftL, creating tmp SWPointer: ", n->_idx); tmp->print(); + } +} + +void SWPointer::Tracer::scaled_iv_9(Node* n, int scale, int _offset, int mult) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftL PASSED, setting _scale = %d, _offset = %d", n->_idx, scale, _offset); + print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset, in(2) %d used to get mult = %d: _scale = %d, _offset = %d", + n->in(1)->_idx, n->in(2)->_idx, mult, scale, _offset); + inc_depth(); inc_depth(); + print_depth(); n->in(1)->dump(); + print_depth(); n->in(2)->dump(); + dec_depth(); dec_depth(); + } +} + +void SWPointer::Tracer::scaled_iv_10(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED", n->_idx); + } +} + +void SWPointer::Tracer::offset_plus_k_1(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print(" %d SWPointer::offset_plus_k: testing node: ", n->_idx); n->dump(); + } +} + +void SWPointer::Tracer::offset_plus_k_2(Node* n, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConI PASSED, setting _offset = %d", n->_idx, _offset); + } +} + +void SWPointer::Tracer::offset_plus_k_3(Node* n, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConL PASSED, setting _offset = %d", n->_idx, _offset); + } +} + +void SWPointer::Tracer::offset_plus_k_4(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx); + print_depth(); tty->print_cr(" \\ " JLONG_FORMAT " SWPointer::offset_plus_k: Op_ConL FAILED, k is too big", n->get_long()); + } +} + +void SWPointer::Tracer::offset_plus_k_5(Node* n, Node* _invar) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED since another invariant has been detected before", n->_idx); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: _invar != NULL: ", _invar->_idx); _invar->dump(); + } +} + +void SWPointer::Tracer::offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", + n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump(); + } +} + +void SWPointer::Tracer::offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", + n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump(); + } +} + +void SWPointer::Tracer::offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI is PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", + n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump(); + } +} + +void SWPointer::Tracer::offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump(); + print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump(); + } +} + +void SWPointer::Tracer::offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset); + print_depth(); tty->print_cr(" \\ %d SWPointer::offset_plus_k: is invariant", n->_idx); + } +} + +void SWPointer::Tracer::offset_plus_k_11(Node* n) { + if(_slp->is_trace_alignment()) { + print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx); + } +} + +#endif // ========================= OrderedPair ===================== const OrderedPair OrderedPair::initial; @@ -3076,13 +3489,20 @@ void DepSuccs::next() { // // --------------------------------- vectorization/simd ----------------------------------- // +bool SuperWord::same_origin_idx(Node* a, Node* b) const { + return a != NULL && b != NULL && _clone_map.same_idx(a->_idx, b->_idx); +} +bool SuperWord::same_generation(Node* a, Node* b) const { + return a != NULL && b != NULL && _clone_map.same_gen(a->_idx, b->_idx); +} + Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) { assert(in_bb(ld), "must be in block"); if (_clone_map.gen(ld->_idx) == _ii_first) { #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(ld->_idx)=%d", - _clone_map.gen(ld->_idx)); + _clone_map.gen(ld->_idx)); } #endif return NULL; //we think that any ld in the first gen being vectorizable @@ -3094,18 +3514,18 @@ Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) { #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::find_phi_for_mem_dep input node %d to load %d has no other outputs and edge mem->load cannot be removed", - mem->_idx, ld->_idx); + mem->_idx, ld->_idx); ld->dump(); mem->dump(); } #endif return NULL; } - if (!in_bb(mem) || _clone_map.gen(mem->_idx) == _clone_map.gen(ld->_idx)) { + if (!in_bb(mem) || same_generation(mem, ld)) { #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(mem->_idx)=%d", - _clone_map.gen(mem->_idx)); + _clone_map.gen(mem->_idx)); } #endif return NULL; // does not depend on loop volatile node or depends on the same generation @@ -3136,7 +3556,7 @@ Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) { #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::find_phi_for_mem_dep load %d is not vectorizable node, its phi %d is not _mem_slice_head", - ld->_idx, phi->_idx); + ld->_idx, phi->_idx); ld->dump(); phi->dump(); } @@ -3151,11 +3571,11 @@ Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) { Node* SuperWord::first_node(Node* nd) { for (int ii = 0; ii < _iteration_first.length(); ii++) { Node* nnn = _iteration_first.at(ii); - if (_clone_map.idx(nnn->_idx) == _clone_map.idx(nd->_idx)) { + if (same_origin_idx(nnn, nd)) { #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::first_node: %d is the first iteration node for %d (_clone_map.idx(nnn->_idx) = %d)", - nnn->_idx, nd->_idx, _clone_map.idx(nnn->_idx)); + nnn->_idx, nd->_idx, _clone_map.idx(nnn->_idx)); } #endif return nnn; @@ -3165,7 +3585,7 @@ Node* SuperWord::first_node(Node* nd) { #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::first_node: did not find first iteration node for %d (_clone_map.idx(nd->_idx)=%d)", - nd->_idx, _clone_map.idx(nd->_idx)); + nd->_idx, _clone_map.idx(nd->_idx)); } #endif return 0; @@ -3174,11 +3594,11 @@ Node* SuperWord::first_node(Node* nd) { Node* SuperWord::last_node(Node* nd) { for (int ii = 0; ii < _iteration_last.length(); ii++) { Node* nnn = _iteration_last.at(ii); - if (_clone_map.idx(nnn->_idx) == _clone_map.idx(nd->_idx)) { + if (same_origin_idx(nnn, nd)) { #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::last_node _clone_map.idx(nnn->_idx)=%d, _clone_map.idx(nd->_idx)=%d", - _clone_map.idx(nnn->_idx), _clone_map.idx(nd->_idx)); + _clone_map.idx(nnn->_idx), _clone_map.idx(nd->_idx)); } #endif return nnn; @@ -3219,9 +3639,11 @@ int SuperWord::mark_generations() { } else if (_ii_first != _clone_map.gen(ii->_idx)) { #ifndef PRODUCT if (TraceSuperWord && Verbose) { - tty->print_cr("SuperWord::mark_generations _ii_first error - found different generations in two nodes "); + tty->print_cr("SuperWord::mark_generations: _ii_first was found before and not equal to one in this node (%d)", _ii_first); ii->dump(); - ii_err->dump(); + if (ii_err!= 0) { + ii_err->dump(); + } } #endif return -1; // this phi has Stores from different generations of unroll and cannot be simd/vectorized @@ -3252,8 +3674,7 @@ int SuperWord::mark_generations() { } // building order of iterations - assert(_ii_order.length() == 0, "should be empty"); - if (ii_err != 0) { + if (_ii_order.length() == 0 && ii_err != 0) { assert(in_bb(ii_err) && ii_err->is_Store(), "should be Store in bb"); Node* nd = ii_err; while(_clone_map.gen(nd->_idx) != _ii_last) { @@ -3261,7 +3682,7 @@ int SuperWord::mark_generations() { bool found = false; for (DUIterator_Fast imax, i = nd->fast_outs(imax); i < imax; i++) { Node* use = nd->fast_out(i); - if (_clone_map.idx(use->_idx) == _clone_map.idx(nd->_idx) && use->as_Store()->in(MemNode::Memory) == nd) { + if (same_origin_idx(use, nd) && use->as_Store()->in(MemNode::Memory) == nd) { found = true; nd = use; break; @@ -3303,7 +3724,7 @@ int SuperWord::mark_generations() { bool SuperWord::fix_commutative_inputs(Node* gold, Node* fix) { assert(gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul(), "should be only Add or Mul nodes"); - assert(_clone_map.idx(gold->_idx) == _clone_map.idx(fix->_idx), "should be clones of the same node"); + assert(same_origin_idx(gold, fix), "should be clones of the same node"); Node* gin1 = gold->in(1); Node* gin2 = gold->in(2); Node* fin1 = fix->in(1); @@ -3311,12 +3732,12 @@ bool SuperWord::fix_commutative_inputs(Node* gold, Node* fix) { bool swapped = false; if (in_bb(gin1) && in_bb(gin2) && in_bb(fin1) && in_bb(fin1)) { - if (_clone_map.idx(gin1->_idx) == _clone_map.idx(fin1->_idx) && - _clone_map.idx(gin2->_idx) == _clone_map.idx(fin2->_idx)) { + if (same_origin_idx(gin1, fin1) && + same_origin_idx(gin2, fin2)) { return true; // nothing to fix } - if (_clone_map.idx(gin1->_idx) == _clone_map.idx(fin2->_idx) && - _clone_map.idx(gin2->_idx) == _clone_map.idx(fin1->_idx)) { + if (same_origin_idx(gin1, fin2) && + same_origin_idx(gin2, fin1)) { fix->swap_edges(1, 2); swapped = true; } @@ -3364,7 +3785,7 @@ bool SuperWord::pack_parallel() { for (int gen = 1; gen < _ii_order.length(); ++gen) { for (int kk = 0; kk < _block.length(); kk++) { Node* clone = _block.at(kk); - if (_clone_map.idx(clone->_idx) == _clone_map.idx(nd->_idx) && + if (same_origin_idx(clone, nd) && _clone_map.gen(clone->_idx) == _ii_order.at(gen)) { if (nd->is_Add() || nd->is_Mul()) { fix_commutative_inputs(nd, clone); @@ -3429,13 +3850,12 @@ bool SuperWord::hoist_loads_in_graph() { if (ld->is_Load() && ld->as_Load()->in(MemNode::Memory) == n && in_bb(ld)) { for (int i = 0; i < _block.length(); i++) { Node* ld2 = _block.at(i); - if (ld2->is_Load() && - _clone_map.idx(ld->_idx) == _clone_map.idx(ld2->_idx) && - _clone_map.gen(ld->_idx) != _clone_map.gen(ld2->_idx)) { // <= do not collect the first generation ld + if (ld2->is_Load() && same_origin_idx(ld, ld2) && + !same_generation(ld, ld2)) { // <= do not collect the first generation ld #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::hoist_loads_in_graph: will try to hoist load ld2->_idx=%d, cloned from %d (ld->_idx=%d)", - ld2->_idx, _clone_map.idx(ld->_idx), ld->_idx); + ld2->_idx, _clone_map.idx(ld->_idx), ld->_idx); } #endif // could not do on-the-fly, since iterator is immutable @@ -3453,7 +3873,7 @@ bool SuperWord::hoist_loads_in_graph() { #ifndef PRODUCT if (_vector_loop_debug) { tty->print_cr("SuperWord::hoist_loads_in_graph replacing MemNode::Memory(%d) edge in %d with one from %d", - MemNode::Memory, ld->_idx, phi->_idx); + MemNode::Memory, ld->_idx, phi->_idx); } #endif _igvn.replace_input_of(ld, MemNode::Memory, phi); diff --git a/hotspot/src/share/vm/opto/superword.hpp b/hotspot/src/share/vm/opto/superword.hpp index f5c4384375b..5e759c411b9 100644 --- a/hotspot/src/share/vm/opto/superword.hpp +++ b/hotspot/src/share/vm/opto/superword.hpp @@ -203,6 +203,7 @@ class SWNodeInfo VALUE_OBJ_CLASS_SPEC { // -----------------------------SuperWord--------------------------------- // Transforms scalar operations into packed (superword) operations. class SuperWord : public ResourceObj { + friend class SWPointer; private: PhaseIdealLoop* _phase; Arena* _arena; @@ -247,8 +248,17 @@ class SuperWord : public ResourceObj { PhaseIdealLoop* phase() { return _phase; } IdealLoopTree* lpt() { return _lpt; } PhiNode* iv() { return _iv; } + bool early_return() { return _early_return; } +#ifndef PRODUCT + bool is_debug() { return _vector_loop_debug > 0; } + bool is_trace_alignment() { return (_vector_loop_debug & 2) > 0; } + bool is_trace_mem_slice() { return (_vector_loop_debug & 4) > 0; } + bool is_trace_loop() { return (_vector_loop_debug & 8) > 0; } + bool is_trace_adjacent() { return (_vector_loop_debug & 16) > 0; } +#endif + bool do_vector_loop() { return _do_vector_loop; } private: IdealLoopTree* _lpt; // Current loop tree node LoopNode* _lp; // Current LoopNode @@ -257,12 +267,14 @@ class SuperWord : public ResourceObj { bool _race_possible; // In cases where SDMU is true bool _early_return; // True if we do not initialize bool _do_vector_loop; // whether to do vectorization/simd style - bool _vector_loop_debug; // provide more printing in debug mode int _num_work_vecs; // Number of non memory vector operations int _num_reductions; // Number of reduction expressions applied int _ii_first; // generation with direct deps from mem phi int _ii_last; // generation with direct deps to mem phi GrowableArray _ii_order; +#ifndef PRODUCT + uintx _vector_loop_debug; // provide more printing in debug mode +#endif // Accessors Arena* arena() { return _arena; } @@ -325,12 +337,20 @@ class SuperWord : public ResourceObj { Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; } void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; } + // CloneMap utilities + bool same_origin_idx(Node* a, Node* b) const; + bool same_generation(Node* a, Node* b) const; + // methods // Extract the superword level parallelism void SLP_extract(); // Find the adjacent memory references and create pack pairs for them. void find_adjacent_refs(); + // Tracing support + #ifndef PRODUCT + void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment); + #endif // Find a memory reference to align the loop induction variable to. MemNode* find_align_to_ref(Node_List &memops); // Calculate loop's iv adjustment for this memory ops. @@ -340,13 +360,13 @@ class SuperWord : public ResourceObj { // rebuild the graph so all loads in different iterations of cloned loop become dependant on phi node (in _do_vector_loop only) bool hoist_loads_in_graph(); // Test whether MemNode::Memory dependency to the same load but in the first iteration of this loop is coming from memory phi - // Return false if failed. + // Return false if failed Node* find_phi_for_mem_dep(LoadNode* ld); // Return same node but from the first generation. Return 0, if not found Node* first_node(Node* nd); // Return same node as this but from the last generation. Return 0, if not found Node* last_node(Node* n); - // Mark nodes belonging to first and last generation, + // Mark nodes belonging to first and last generation // returns first generation index or -1 if vectorization/simd is impossible int mark_generations(); // swapping inputs of commutative instruction (Add or Mul) @@ -483,10 +503,7 @@ class SWPointer VALUE_OBJ_CLASS_SPEC { IdealLoopTree* lpt() { return _slp->lpt(); } PhiNode* iv() { return _slp->iv(); } // Induction var - bool invariant(Node* n) { - Node *n_c = phase()->get_ctrl(n); - return !lpt()->is_member(phase()->get_loop(n_c)); - } + bool invariant(Node* n); // Match: k*iv + offset bool scaled_iv_plus_offset(Node* n); @@ -545,6 +562,76 @@ class SWPointer VALUE_OBJ_CLASS_SPEC { static bool comparable(int cmp) { return cmp < NotComparable; } void print(); + +#ifndef PRODUCT + class Tracer { + friend class SuperWord; + friend class SWPointer; + SuperWord* _slp; + static int _depth; + int _depth_save; + void print_depth(); + int depth() const { return _depth; } + void set_depth(int d) { _depth = d; } + void inc_depth() { _depth++;} + void dec_depth() { if (_depth > 0) _depth--;} + void store_depth() {_depth_save = _depth;} + void restore_depth() {_depth = _depth_save;} + + class Depth { + friend class Tracer; + friend class SWPointer; + friend class SuperWord; + Depth() { ++_depth; } + Depth(int x) { _depth = 0; } + ~Depth() { if (_depth > 0) --_depth;} + }; + Tracer (SuperWord* slp) : _slp(slp) {} + + // tracing functions + void ctor_1(Node* mem); + void ctor_2(Node* adr); + void ctor_3(Node* adr, int i); + void ctor_4(Node* adr, int i); + void ctor_5(Node* adr, Node* base, int i); + void ctor_6(Node* mem); + + void invariant_1(Node *n, Node *n_c); + + void scaled_iv_plus_offset_1(Node* n); + void scaled_iv_plus_offset_2(Node* n); + void scaled_iv_plus_offset_3(Node* n); + void scaled_iv_plus_offset_4(Node* n); + void scaled_iv_plus_offset_5(Node* n); + void scaled_iv_plus_offset_6(Node* n); + void scaled_iv_plus_offset_7(Node* n); + void scaled_iv_plus_offset_8(Node* n); + + void scaled_iv_1(Node* n); + void scaled_iv_2(Node* n, int scale); + void scaled_iv_3(Node* n, int scale); + void scaled_iv_4(Node* n, int scale); + void scaled_iv_5(Node* n, int scale); + void scaled_iv_6(Node* n, int scale); + void scaled_iv_7(Node* n); + void scaled_iv_8(Node* n, SWPointer* tmp); + void scaled_iv_9(Node* n, int _scale, int _offset, int mult); + void scaled_iv_10(Node* n); + + void offset_plus_k_1(Node* n); + void offset_plus_k_2(Node* n, int _offset); + void offset_plus_k_3(Node* n, int _offset); + void offset_plus_k_4(Node* n); + void offset_plus_k_5(Node* n, Node* _invar); + void offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset); + void offset_plus_k_11(Node* n); + + } _tracer;//TRacer; +#endif };