8085932: Fixing bugs in detecting memory alignments in SuperWord

Reviewed-by: kvn
This commit is contained in:
Jan Civlin 2015-07-14 09:55:22 -07:00 committed by Vladimir Kozlov
parent b5284a93ce
commit 2a4f18ccbd
4 changed files with 565 additions and 58 deletions

View File

@ -140,6 +140,9 @@ class CloneMap {
bool is_debug() const { return _debug; } bool is_debug() const { return _debug; }
void set_debug(bool debug) { _debug = debug; } void set_debug(bool debug) { _debug = debug; }
static const char* debug_option_name; static const char* debug_option_name;
bool same_idx(node_idx_t k1, node_idx_t k2) const { return idx(k1) == idx(k2); }
bool same_gen(node_idx_t k1, node_idx_t k2) const { return gen(k1) == gen(k2); }
}; };
//------------------------------Compile---------------------------------------- //------------------------------Compile----------------------------------------

View File

@ -3682,7 +3682,6 @@ void PhaseIdealLoop::dump( ) const {
} }
void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const { void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
CloneMap& cm = C->clone_map();
loop->dump_head(); loop->dump_head();
// Now scan for CFG nodes in the same loop // Now scan for CFG nodes in the same loop
@ -3714,7 +3713,6 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list )
cached_idom = find_non_split_ctrl(cached_idom); cached_idom = find_non_split_ctrl(cached_idom);
} }
tty->print(" ID:%d",computed_idom->_idx); tty->print(" ID:%d",computed_idom->_idx);
cm.dump(n->_idx);
n->dump(); n->dump();
if( cached_idom != computed_idom ) { if( cached_idom != computed_idom ) {
tty->print_cr("*** BROKEN IDOM! Computed as: %d, cached as: %d", tty->print_cr("*** BROKEN IDOM! Computed as: %d, cached as: %d",
@ -3734,7 +3732,6 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list )
for( uint j = 0; j < loop->_nest; j++ ) for( uint j = 0; j < loop->_nest; j++ )
tty->print(" "); tty->print(" ");
tty->print(" "); tty->print(" ");
cm.dump(m->_idx);
m->dump(); m->dump();
} }
} }

View File

@ -74,9 +74,15 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
_do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style
_ii_first(-1), // first loop generation index - only if do_vector_loop() _ii_first(-1), // first loop generation index - only if do_vector_loop()
_ii_last(-1), // last loop generation index - only if do_vector_loop() _ii_last(-1), // last loop generation index - only if do_vector_loop()
_ii_order(arena(), 8, 0, 0), _ii_order(arena(), 8, 0, 0)
_vector_loop_debug(phase->C->has_method() && phase->C->method_has_option("VectorizeDebug")) {
{} #ifndef PRODUCT
_vector_loop_debug = 0;
if (_phase->C->method() != NULL) {
_phase->C->method()->has_option_value("VectorizeDebug", _vector_loop_debug);
}
#endif
}
//------------------------------transform_loop--------------------------- //------------------------------transform_loop---------------------------
void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
@ -90,7 +96,6 @@ void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
// Check for no control flow in body (other than exit) // Check for no control flow in body (other than exit)
Node *cl_exit = cl->loopexit(); Node *cl_exit = cl->loopexit();
if (cl_exit->in(0) != lpt->_head) return; if (cl_exit->in(0) != lpt->_head) return;
@ -425,13 +430,15 @@ void SuperWord::find_adjacent_refs() {
// this reference to a vector-aligned address. // this reference to a vector-aligned address.
best_align_to_mem_ref = mem_ref; best_align_to_mem_ref = mem_ref;
best_iv_adjustment = iv_adjustment; best_iv_adjustment = iv_adjustment;
NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);)
} }
SWPointer align_to_ref_p(mem_ref, this, NULL, false); SWPointer align_to_ref_p(mem_ref, this, NULL, false);
// Set alignment relative to "align_to_ref" for all related memory operations. // Set alignment relative to "align_to_ref" for all related memory operations.
for (int i = memops.size() - 1; i >= 0; i--) { for (int i = memops.size() - 1; i >= 0; i--) {
MemNode* s = memops.at(i)->as_Mem(); MemNode* s = memops.at(i)->as_Mem();
if (isomorphic(s, mem_ref)) { if (isomorphic(s, mem_ref) &&
(!_do_vector_loop || same_origin_idx(s, mem_ref))) {
SWPointer p2(s, this, NULL, false); SWPointer p2(s, this, NULL, false);
if (p2.comparable(align_to_ref_p)) { if (p2.comparable(align_to_ref_p)) {
int align = memory_alignment(s, iv_adjustment); int align = memory_alignment(s, iv_adjustment);
@ -496,7 +503,7 @@ void SuperWord::find_adjacent_refs() {
Node_List* pair = new Node_List(); Node_List* pair = new Node_List();
pair->push(s1); pair->push(s1);
pair->push(s2); pair->push(s2);
if (!_do_vector_loop || _clone_map.idx(s1->_idx) == _clone_map.idx(s2->_idx)) { if (!_do_vector_loop || same_origin_idx(s1, s2)) {
_packset.append(pair); _packset.append(pair);
} }
} }
@ -533,8 +540,12 @@ void SuperWord::find_adjacent_refs() {
memops.push(s); memops.push(s);
} }
MemNode* best_align_to_mem_ref = find_align_to_ref(memops); MemNode* best_align_to_mem_ref = find_align_to_ref(memops);
if (best_align_to_mem_ref == NULL) break; if (best_align_to_mem_ref == NULL) {
NOT_PRODUCT(if (TraceSuperWord) tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");)
break;
}
best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref); best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);)
// Restore list. // Restore list.
while (memops.size() > orig_msize) while (memops.size() > orig_msize)
(void)memops.pop(); (void)memops.pop();
@ -560,6 +571,16 @@ void SuperWord::find_adjacent_refs() {
#endif #endif
} }
#ifndef PRODUCT
void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment) {
if (is_trace_adjacent()) {
tty->print("SuperWord::find_adjacent_refs best_align_to_mem_ref = %d, best_iv_adjustment = %d",
best_align_to_mem_ref->_idx, best_iv_adjustment);
best_align_to_mem_ref->dump();
}
}
#endif
//------------------------------find_align_to_ref--------------------------- //------------------------------find_align_to_ref---------------------------
// Find a memory reference to align the loop induction variable to. // Find a memory reference to align the loop induction variable to.
// Looks first at stores then at loads, looking for a memory reference // Looks first at stores then at loads, looking for a memory reference
@ -756,9 +777,11 @@ int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
} }
#ifndef PRODUCT #ifndef PRODUCT
if (TraceSuperWord) if (TraceSuperWord) {
tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d", tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ",
offset, iv_adjustment, elt_size, scale, iv_stride(), vw); mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
mem_ref->dump();
}
#endif #endif
return iv_adjustment; return iv_adjustment;
} }
@ -863,12 +886,14 @@ void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &p
Node* n = start; Node* n = start;
Node* prev = NULL; Node* prev = NULL;
while (true) { while (true) {
NOT_PRODUCT( if(is_trace_mem_slice()) tty->print_cr("SuperWord::mem_slice_preds: n %d", n->_idx);)
assert(in_bb(n), "must be in block"); assert(in_bb(n), "must be in block");
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* out = n->fast_out(i); Node* out = n->fast_out(i);
if (out->is_Load()) { if (out->is_Load()) {
if (in_bb(out)) { if (in_bb(out)) {
preds.push(out); preds.push(out);
NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);)
} }
} else { } else {
// FIXME // FIXME
@ -883,10 +908,11 @@ void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &p
} else { } else {
assert(out == prev || prev == NULL, "no branches off of store slice"); assert(out == prev || prev == NULL, "no branches off of store slice");
} }
} }//else
} }//for
if (n == stop) break; if (n == stop) break;
preds.push(n); preds.push(n);
NOT_PRODUCT(if (TraceSuperWord && Verbose) tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);)
prev = n; prev = n;
assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name())); assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name()));
n = n->in(MemNode::Memory); n = n->in(MemNode::Memory);
@ -2140,18 +2166,38 @@ bool SuperWord::construct_bb() {
} else { } else {
_stk.pop(); // Remove post-visited node from stack _stk.pop(); // Remove post-visited node from stack
} }
} }//while
int ii_current = -1;
unsigned int load_idx = -1;
_ii_order.clear();
// Create real map of block indices for nodes // Create real map of block indices for nodes
for (int j = 0; j < _block.length(); j++) { for (int j = 0; j < _block.length(); j++) {
Node* n = _block.at(j); Node* n = _block.at(j);
set_bb_idx(n, j); set_bb_idx(n, j);
if (_do_vector_loop && n->is_Load()) {
if (ii_current == -1) {
ii_current = _clone_map.gen(n->_idx);
_ii_order.push(ii_current);
load_idx = _clone_map.idx(n->_idx);
} else if (_clone_map.idx(n->_idx) == load_idx && _clone_map.gen(n->_idx) != ii_current) {
ii_current = _clone_map.gen(n->_idx);
_ii_order.push(ii_current);
} }
}
}//for
// Ensure extra info is allocated. // Ensure extra info is allocated.
initialize_bb(); initialize_bb();
#ifndef PRODUCT #ifndef PRODUCT
if (_vector_loop_debug && _ii_order.length() > 0) {
tty->print("SuperWord::construct_bb: List of generations: ");
for (int jj = 0; jj < _ii_order.length(); ++jj) {
tty->print(" %d:%d", jj, _ii_order.at(jj));
}
tty->print_cr(" ");
}
if (TraceSuperWord) { if (TraceSuperWord) {
print_bb(); print_bb();
tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE"); tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
@ -2312,18 +2358,27 @@ void SuperWord::compute_vector_element_type() {
//------------------------------memory_alignment--------------------------- //------------------------------memory_alignment---------------------------
// Alignment within a vector memory reference // Alignment within a vector memory reference
int SuperWord::memory_alignment(MemNode* s, int iv_adjust) { int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
#ifndef PRODUCT
if(TraceSuperWord && Verbose) {
tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump();
}
#endif
NOT_PRODUCT(SWPointer::Tracer::Depth ddd(0);)
SWPointer p(s, this, NULL, false); SWPointer p(s, this, NULL, false);
if (!p.valid()) { if (!p.valid()) {
NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");)
return bottom_align; return bottom_align;
} }
int vw = vector_width_in_bytes(s); int vw = vector_width_in_bytes(s);
if (vw < 2) { if (vw < 2) {
NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");)
return bottom_align; // No vectors for this type return bottom_align; // No vectors for this type
} }
int offset = p.offset_in_bytes(); int offset = p.offset_in_bytes();
offset += iv_adjust*p.memory_size(); offset += iv_adjust*p.memory_size();
int off_rem = offset % vw; int off_rem = offset % vw;
int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
NOT_PRODUCT(if(TraceSuperWord && Verbose) tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);)
return off_mod; return off_mod;
} }
@ -2732,13 +2787,20 @@ char* SuperWord::blank(uint depth) {
//==============================SWPointer=========================== //==============================SWPointer===========================
#ifndef PRODUCT
int SWPointer::Tracer::_depth = 0;
#endif
//----------------------------SWPointer------------------------ //----------------------------SWPointer------------------------
SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) : SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) :
_mem(mem), _slp(slp), _base(NULL), _adr(NULL), _mem(mem), _slp(slp), _base(NULL), _adr(NULL),
_scale(0), _offset(0), _invar(NULL), _negate_invar(false), _scale(0), _offset(0), _invar(NULL), _negate_invar(false),
_nstack(nstack), _analyze_only(analyze_only), _nstack(nstack), _analyze_only(analyze_only),
_stack_idx(0) { _stack_idx(0)
#ifndef PRODUCT
, _tracer(slp)
#endif
{
NOT_PRODUCT(_tracer.ctor_1(mem);)
Node* adr = mem->in(MemNode::Address); Node* adr = mem->in(MemNode::Address);
if (!adr->is_AddP()) { if (!adr->is_AddP()) {
@ -2757,16 +2819,29 @@ SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool anal
assert(!valid(), "unsafe access"); assert(!valid(), "unsafe access");
return; return;
} }
for (int i = 0; i < 3; i++) {
NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.store_depth();)
NOT_PRODUCT(_tracer.ctor_2(adr);)
int i;
for (i = 0; i < 3; i++) {
NOT_PRODUCT(_tracer.ctor_3(adr, i);)
if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
assert(!valid(), "too complex"); assert(!valid(), "too complex");
return; return;
} }
adr = adr->in(AddPNode::Address); adr = adr->in(AddPNode::Address);
NOT_PRODUCT(_tracer.ctor_4(adr, i);)
if (base == adr || !adr->is_AddP()) { if (base == adr || !adr->is_AddP()) {
NOT_PRODUCT(_tracer.ctor_5(adr, base, i);)
break; // stop looking at addp's break; // stop looking at addp's
} }
} }
NOT_PRODUCT(if(_slp->is_trace_alignment()) _tracer.restore_depth();)
NOT_PRODUCT(_tracer.ctor_6(mem);)
_base = base; _base = base;
_adr = adr; _adr = adr;
assert(valid(), "Usable"); assert(valid(), "Usable");
@ -2778,68 +2853,103 @@ SWPointer::SWPointer(SWPointer* p) :
_mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL), _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL),
_scale(0), _offset(0), _invar(NULL), _negate_invar(false), _scale(0), _offset(0), _invar(NULL), _negate_invar(false),
_nstack(p->_nstack), _analyze_only(p->_analyze_only), _nstack(p->_nstack), _analyze_only(p->_analyze_only),
_stack_idx(p->_stack_idx) {} _stack_idx(p->_stack_idx)
#ifndef PRODUCT
, _tracer(p->_slp)
#endif
{}
bool SWPointer::invariant(Node* n) {
NOT_PRODUCT(Tracer::Depth dd;)
Node *n_c = phase()->get_ctrl(n);
NOT_PRODUCT(_tracer.invariant_1(n, n_c);)
return !lpt()->is_member(phase()->get_loop(n_c));
}
//------------------------scaled_iv_plus_offset-------------------- //------------------------scaled_iv_plus_offset--------------------
// Match: k*iv + offset // Match: k*iv + offset
// where: k is a constant that maybe zero, and // where: k is a constant that maybe zero, and
// offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional // offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
bool SWPointer::scaled_iv_plus_offset(Node* n) { bool SWPointer::scaled_iv_plus_offset(Node* n) {
NOT_PRODUCT(Tracer::Depth ddd;)
NOT_PRODUCT(_tracer.scaled_iv_plus_offset_1(n);)
if (scaled_iv(n)) { if (scaled_iv(n)) {
NOT_PRODUCT(_tracer.scaled_iv_plus_offset_2(n);)
return true; return true;
} }
if (offset_plus_k(n)) { if (offset_plus_k(n)) {
NOT_PRODUCT(_tracer.scaled_iv_plus_offset_3(n);)
return true; return true;
} }
int opc = n->Opcode(); int opc = n->Opcode();
if (opc == Op_AddI) { if (opc == Op_AddI) {
if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) { if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) {
NOT_PRODUCT(_tracer.scaled_iv_plus_offset_4(n);)
return true; return true;
} }
if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
NOT_PRODUCT(_tracer.scaled_iv_plus_offset_5(n);)
return true; return true;
} }
} else if (opc == Op_SubI) { } else if (opc == Op_SubI) {
if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) { if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) {
NOT_PRODUCT(_tracer.scaled_iv_plus_offset_6(n);)
return true; return true;
} }
if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
_scale *= -1; _scale *= -1;
NOT_PRODUCT(_tracer.scaled_iv_plus_offset_7(n);)
return true; return true;
} }
} }
NOT_PRODUCT(_tracer.scaled_iv_plus_offset_8(n);)
return false; return false;
} }
//----------------------------scaled_iv------------------------ //----------------------------scaled_iv------------------------
// Match: k*iv where k is a constant that's not zero // Match: k*iv where k is a constant that's not zero
bool SWPointer::scaled_iv(Node* n) { bool SWPointer::scaled_iv(Node* n) {
if (_scale != 0) { NOT_PRODUCT(Tracer::Depth ddd;)
return false; // already found a scale NOT_PRODUCT(_tracer.scaled_iv_1(n);)
if (_scale != 0) { // already found a scale
NOT_PRODUCT(_tracer.scaled_iv_2(n, _scale);)
return false;
} }
if (n == iv()) { if (n == iv()) {
_scale = 1; _scale = 1;
NOT_PRODUCT(_tracer.scaled_iv_3(n, _scale);)
return true; return true;
} }
if (_analyze_only && (invariant(n) == false)) { if (_analyze_only && (invariant(n) == false)) {
_nstack->push(n, _stack_idx++); _nstack->push(n, _stack_idx++);
} }
int opc = n->Opcode(); int opc = n->Opcode();
if (opc == Op_MulI) { if (opc == Op_MulI) {
if (n->in(1) == iv() && n->in(2)->is_Con()) { if (n->in(1) == iv() && n->in(2)->is_Con()) {
_scale = n->in(2)->get_int(); _scale = n->in(2)->get_int();
NOT_PRODUCT(_tracer.scaled_iv_4(n, _scale);)
return true; return true;
} else if (n->in(2) == iv() && n->in(1)->is_Con()) { } else if (n->in(2) == iv() && n->in(1)->is_Con()) {
_scale = n->in(1)->get_int(); _scale = n->in(1)->get_int();
NOT_PRODUCT(_tracer.scaled_iv_5(n, _scale);)
return true; return true;
} }
} else if (opc == Op_LShiftI) { } else if (opc == Op_LShiftI) {
if (n->in(1) == iv() && n->in(2)->is_Con()) { if (n->in(1) == iv() && n->in(2)->is_Con()) {
_scale = 1 << n->in(2)->get_int(); _scale = 1 << n->in(2)->get_int();
NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);)
return true; return true;
} }
} else if (opc == Op_ConvI2L) { } else if (opc == Op_ConvI2L) {
if (scaled_iv_plus_offset(n->in(1))) { if (scaled_iv_plus_offset(n->in(1))) {
NOT_PRODUCT(_tracer.scaled_iv_7(n);)
return true; return true;
} }
} else if (opc == Op_LShiftL) { } else if (opc == Op_LShiftL) {
@ -2847,17 +2957,22 @@ bool SWPointer::scaled_iv(Node* n) {
// Need to preserve the current _offset value, so // Need to preserve the current _offset value, so
// create a temporary object for this expression subtree. // create a temporary object for this expression subtree.
// Hacky, so should re-engineer the address pattern match. // Hacky, so should re-engineer the address pattern match.
NOT_PRODUCT(Tracer::Depth dddd;)
SWPointer tmp(this); SWPointer tmp(this);
NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);)
if (tmp.scaled_iv_plus_offset(n->in(1))) { if (tmp.scaled_iv_plus_offset(n->in(1))) {
if (tmp._invar == NULL) { if (tmp._invar == NULL || _slp->do_vector_loop()) {
int mult = 1 << n->in(2)->get_int(); int mult = 1 << n->in(2)->get_int();
_scale = tmp._scale * mult; _scale = tmp._scale * mult;
_offset += tmp._offset * mult; _offset += tmp._offset * mult;
NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, mult);)
return true; return true;
} }
} }
} }
} }
NOT_PRODUCT(_tracer.scaled_iv_10(n);)
return false; return false;
} }
@ -2865,9 +2980,13 @@ bool SWPointer::scaled_iv(Node* n) {
// Match: offset is (k [+/- invariant]) // Match: offset is (k [+/- invariant])
// where k maybe zero and invariant is optional, but not both. // where k maybe zero and invariant is optional, but not both.
bool SWPointer::offset_plus_k(Node* n, bool negate) { bool SWPointer::offset_plus_k(Node* n, bool negate) {
NOT_PRODUCT(Tracer::Depth ddd;)
NOT_PRODUCT(_tracer.offset_plus_k_1(n);)
int opc = n->Opcode(); int opc = n->Opcode();
if (opc == Op_ConI) { if (opc == Op_ConI) {
_offset += negate ? -(n->get_int()) : n->get_int(); _offset += negate ? -(n->get_int()) : n->get_int();
NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);)
return true; return true;
} else if (opc == Op_ConL) { } else if (opc == Op_ConL) {
// Okay if value fits into an int // Okay if value fits into an int
@ -2876,11 +2995,17 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
jlong loff = n->get_long(); jlong loff = n->get_long();
jint off = (jint)loff; jint off = (jint)loff;
_offset += negate ? -off : loff; _offset += negate ? -off : loff;
NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);)
return true; return true;
} }
NOT_PRODUCT(_tracer.offset_plus_k_4(n);)
return false; return false;
} }
if (_invar != NULL) return false; // already have an invariant if (_invar != NULL) { // already has an invariant
NOT_PRODUCT(_tracer.offset_plus_k_5(n, _invar);)
return false;
}
if (_analyze_only && (invariant(n) == false)) { if (_analyze_only && (invariant(n) == false)) {
_nstack->push(n, _stack_idx++); _nstack->push(n, _stack_idx++);
} }
@ -2889,11 +3014,13 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
_negate_invar = negate; _negate_invar = negate;
_invar = n->in(1); _invar = n->in(1);
_offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, _negate_invar, _offset);)
return true; return true;
} else if (n->in(1)->is_Con() && invariant(n->in(2))) { } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
_offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
_negate_invar = negate; _negate_invar = negate;
_invar = n->in(2); _invar = n->in(2);
NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, _negate_invar, _offset);)
return true; return true;
} }
} }
@ -2902,19 +3029,24 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
_negate_invar = negate; _negate_invar = negate;
_invar = n->in(1); _invar = n->in(1);
_offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, _negate_invar, _offset);)
return true; return true;
} else if (n->in(1)->is_Con() && invariant(n->in(2))) { } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
_offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
_negate_invar = !negate; _negate_invar = !negate;
_invar = n->in(2); _invar = n->in(2);
NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, _negate_invar, _offset);)
return true; return true;
} }
} }
if (invariant(n)) { if (invariant(n)) {
_negate_invar = negate; _negate_invar = negate;
_invar = n; _invar = n;
NOT_PRODUCT(_tracer.offset_plus_k_10(n, _invar, _negate_invar, _offset);)
return true; return true;
} }
NOT_PRODUCT(_tracer.offset_plus_k_11(n);)
return false; return false;
} }
@ -2930,6 +3062,287 @@ void SWPointer::print() {
#endif #endif
} }
//----------------------------tracing------------------------
#ifndef PRODUCT
void SWPointer::Tracer::print_depth() {
for (int ii = 0; ii<_depth; ++ii) tty->print(" ");
}
void SWPointer::Tracer::ctor_1 (Node* mem) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print(" %d SWPointer::SWPointer: start alignment analysis", mem->_idx); mem->dump();
}
}
void SWPointer::Tracer::ctor_2(Node* adr) {
if(_slp->is_trace_alignment()) {
//store_depth();
inc_depth();
print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: ", adr->_idx); adr->dump();
inc_depth();
print_depth(); tty->print(" %d (base) SWPointer::SWPointer: ", adr->in(AddPNode::Base)->_idx); adr->in(AddPNode::Base)->dump();
}
}
void SWPointer::Tracer::ctor_3(Node* adr, int i) {
if(_slp->is_trace_alignment()) {
inc_depth();
Node* offset = adr->in(AddPNode::Offset);
print_depth(); tty->print(" %d (offset) SWPointer::SWPointer: i = %d: ", offset->_idx, i); offset->dump();
}
}
void SWPointer::Tracer::ctor_4(Node* adr, int i) {
if(_slp->is_trace_alignment()) {
inc_depth();
print_depth(); tty->print(" %d (adr) SWPointer::SWPointer: i = %d: ", adr->_idx, i); adr->dump();
}
}
void SWPointer::Tracer::ctor_5(Node* adr, Node* base, int i) {
if(_slp->is_trace_alignment()) {
inc_depth();
if (base == adr) {
print_depth(); tty->print_cr(" \\ %d (adr) == %d (base) SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, base->_idx, i);
} else if (!adr->is_AddP()) {
print_depth(); tty->print_cr(" \\ %d (adr) is NOT Addp SWPointer::SWPointer: breaking analysis at i = %d", adr->_idx, i);
}
}
}
void SWPointer::Tracer::ctor_6(Node* mem) {
if(_slp->is_trace_alignment()) {
//restore_depth();
print_depth(); tty->print_cr(" %d (adr) SWPointer::SWPointer: stop analysis", mem->_idx);
}
}
void SWPointer::Tracer::invariant_1(Node *n, Node *n_c) {
if (_slp->do_vector_loop() && _slp->is_debug() && _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)) != (int)_slp->in_bb(n)) {
int is_member = _slp->_lpt->is_member(_slp->_phase->get_loop(n_c));
int in_bb = _slp->in_bb(n);
print_depth(); tty->print(" \\ "); tty->print_cr(" %d SWPointer::invariant conditions differ: n_c %d", n->_idx, n_c->_idx);
print_depth(); tty->print(" \\ "); tty->print_cr("is_member %d, in_bb %d", is_member, in_bb);
print_depth(); tty->print(" \\ "); n->dump();
print_depth(); tty->print(" \\ "); n_c->dump();
}
}
void SWPointer::Tracer::scaled_iv_plus_offset_1(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print(" %d SWPointer::scaled_iv_plus_offset testing node: ", n->_idx);
n->dump();
}
}
void SWPointer::Tracer::scaled_iv_plus_offset_2(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx);
}
}
void SWPointer::Tracer::scaled_iv_plus_offset_3(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: PASSED", n->_idx);
}
}
void SWPointer::Tracer::scaled_iv_plus_offset_4(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx);
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump();
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump();
}
}
void SWPointer::Tracer::scaled_iv_plus_offset_5(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_AddI PASSED", n->_idx);
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump();
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump();
}
}
void SWPointer::Tracer::scaled_iv_plus_offset_6(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx);
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is scaled_iv: ", n->in(1)->_idx); n->in(1)->dump();
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is offset_plus_k: ", n->in(2)->_idx); n->in(2)->dump();
}
}
void SWPointer::Tracer::scaled_iv_plus_offset_7(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: Op_SubI PASSED", n->_idx);
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(2) is scaled_iv: ", n->in(2)->_idx); n->in(2)->dump();
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv_plus_offset: in(1) is offset_plus_k: ", n->in(1)->_idx); n->in(1)->dump();
}
}
void SWPointer::Tracer::scaled_iv_plus_offset_8(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv_plus_offset: FAILED", n->_idx);
}
}
void SWPointer::Tracer::scaled_iv_1(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print(" %d SWPointer::scaled_iv: testing node: ", n->_idx); n->dump();
}
}
void SWPointer::Tracer::scaled_iv_2(Node* n, int scale) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED since another _scale has been detected before", n->_idx);
print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: _scale (%d) != 0", scale);
}
}
void SWPointer::Tracer::scaled_iv_3(Node* n, int scale) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: is iv, setting _scale = %d", n->_idx, scale);
}
}
void SWPointer::Tracer::scaled_iv_4(Node* n, int scale) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale);
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump();
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
}
}
void SWPointer::Tracer::scaled_iv_5(Node* n, int scale) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_MulI PASSED, setting _scale = %d", n->_idx, scale);
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is iv: ", n->in(2)->_idx); n->in(2)->dump();
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump();
}
}
void SWPointer::Tracer::scaled_iv_6(Node* n, int scale) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftI PASSED, setting _scale = %d", n->_idx, scale);
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(1) is iv: ", n->in(1)->_idx); n->in(1)->dump();
print_depth(); tty->print(" \\ %d SWPointer::scaled_iv: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
}
}
void SWPointer::Tracer::scaled_iv_7(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_ConvI2L PASSED", n->_idx);
print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset: ", n->in(1)->_idx);
inc_depth(); inc_depth();
print_depth(); n->in(1)->dump();
dec_depth(); dec_depth();
}
}
void SWPointer::Tracer::scaled_iv_8(Node* n, SWPointer* tmp) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print(" %d SWPointer::scaled_iv: Op_LShiftL, creating tmp SWPointer: ", n->_idx); tmp->print();
}
}
void SWPointer::Tracer::scaled_iv_9(Node* n, int scale, int _offset, int mult) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftL PASSED, setting _scale = %d, _offset = %d", n->_idx, scale, _offset);
print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset, in(2) %d used to get mult = %d: _scale = %d, _offset = %d",
n->in(1)->_idx, n->in(2)->_idx, mult, scale, _offset);
inc_depth(); inc_depth();
print_depth(); n->in(1)->dump();
print_depth(); n->in(2)->dump();
dec_depth(); dec_depth();
}
}
void SWPointer::Tracer::scaled_iv_10(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: FAILED", n->_idx);
}
}
void SWPointer::Tracer::offset_plus_k_1(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print(" %d SWPointer::offset_plus_k: testing node: ", n->_idx); n->dump();
}
}
void SWPointer::Tracer::offset_plus_k_2(Node* n, int _offset) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConI PASSED, setting _offset = %d", n->_idx, _offset);
}
}
void SWPointer::Tracer::offset_plus_k_3(Node* n, int _offset) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_ConL PASSED, setting _offset = %d", n->_idx, _offset);
}
}
void SWPointer::Tracer::offset_plus_k_4(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx);
print_depth(); tty->print_cr(" \\ " JLONG_FORMAT " SWPointer::offset_plus_k: Op_ConL FAILED, k is too big", n->get_long());
}
}
void SWPointer::Tracer::offset_plus_k_5(Node* n, Node* _invar) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED since another invariant has been detected before", n->_idx);
print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: _invar != NULL: ", _invar->_idx); _invar->dump();
}
}
void SWPointer::Tracer::offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d",
n->_idx, _negate_invar, _invar->_idx, _offset);
print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump();
}
}
void SWPointer::Tracer::offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_AddI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d",
n->_idx, _negate_invar, _invar->_idx, _offset);
print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump();
print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump();
}
}
void SWPointer::Tracer::offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI is PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d",
n->_idx, _negate_invar, _invar->_idx, _offset);
print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is Con: ", n->in(2)->_idx); n->in(2)->dump();
print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is invariant: ", _invar->_idx); _invar->dump();
}
}
void SWPointer::Tracer::offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: Op_SubI PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset);
print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(1) is Con: ", n->in(1)->_idx); n->in(1)->dump();
print_depth(); tty->print(" \\ %d SWPointer::offset_plus_k: in(2) is invariant: ", _invar->_idx); _invar->dump();
}
}
void SWPointer::Tracer::offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: PASSED, setting _negate_invar = %d, _invar = %d, _offset = %d", n->_idx, _negate_invar, _invar->_idx, _offset);
print_depth(); tty->print_cr(" \\ %d SWPointer::offset_plus_k: is invariant", n->_idx);
}
}
void SWPointer::Tracer::offset_plus_k_11(Node* n) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::offset_plus_k: FAILED", n->_idx);
}
}
#endif
// ========================= OrderedPair ===================== // ========================= OrderedPair =====================
const OrderedPair OrderedPair::initial; const OrderedPair OrderedPair::initial;
@ -3076,6 +3489,13 @@ void DepSuccs::next() {
// //
// --------------------------------- vectorization/simd ----------------------------------- // --------------------------------- vectorization/simd -----------------------------------
// //
bool SuperWord::same_origin_idx(Node* a, Node* b) const {
return a != NULL && b != NULL && _clone_map.same_idx(a->_idx, b->_idx);
}
bool SuperWord::same_generation(Node* a, Node* b) const {
return a != NULL && b != NULL && _clone_map.same_gen(a->_idx, b->_idx);
}
Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) { Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) {
assert(in_bb(ld), "must be in block"); assert(in_bb(ld), "must be in block");
if (_clone_map.gen(ld->_idx) == _ii_first) { if (_clone_map.gen(ld->_idx) == _ii_first) {
@ -3101,7 +3521,7 @@ Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) {
#endif #endif
return NULL; return NULL;
} }
if (!in_bb(mem) || _clone_map.gen(mem->_idx) == _clone_map.gen(ld->_idx)) { if (!in_bb(mem) || same_generation(mem, ld)) {
#ifndef PRODUCT #ifndef PRODUCT
if (_vector_loop_debug) { if (_vector_loop_debug) {
tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(mem->_idx)=%d", tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(mem->_idx)=%d",
@ -3151,7 +3571,7 @@ Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) {
Node* SuperWord::first_node(Node* nd) { Node* SuperWord::first_node(Node* nd) {
for (int ii = 0; ii < _iteration_first.length(); ii++) { for (int ii = 0; ii < _iteration_first.length(); ii++) {
Node* nnn = _iteration_first.at(ii); Node* nnn = _iteration_first.at(ii);
if (_clone_map.idx(nnn->_idx) == _clone_map.idx(nd->_idx)) { if (same_origin_idx(nnn, nd)) {
#ifndef PRODUCT #ifndef PRODUCT
if (_vector_loop_debug) { if (_vector_loop_debug) {
tty->print_cr("SuperWord::first_node: %d is the first iteration node for %d (_clone_map.idx(nnn->_idx) = %d)", tty->print_cr("SuperWord::first_node: %d is the first iteration node for %d (_clone_map.idx(nnn->_idx) = %d)",
@ -3174,7 +3594,7 @@ Node* SuperWord::first_node(Node* nd) {
Node* SuperWord::last_node(Node* nd) { Node* SuperWord::last_node(Node* nd) {
for (int ii = 0; ii < _iteration_last.length(); ii++) { for (int ii = 0; ii < _iteration_last.length(); ii++) {
Node* nnn = _iteration_last.at(ii); Node* nnn = _iteration_last.at(ii);
if (_clone_map.idx(nnn->_idx) == _clone_map.idx(nd->_idx)) { if (same_origin_idx(nnn, nd)) {
#ifndef PRODUCT #ifndef PRODUCT
if (_vector_loop_debug) { if (_vector_loop_debug) {
tty->print_cr("SuperWord::last_node _clone_map.idx(nnn->_idx)=%d, _clone_map.idx(nd->_idx)=%d", tty->print_cr("SuperWord::last_node _clone_map.idx(nnn->_idx)=%d, _clone_map.idx(nd->_idx)=%d",
@ -3219,10 +3639,12 @@ int SuperWord::mark_generations() {
} else if (_ii_first != _clone_map.gen(ii->_idx)) { } else if (_ii_first != _clone_map.gen(ii->_idx)) {
#ifndef PRODUCT #ifndef PRODUCT
if (TraceSuperWord && Verbose) { if (TraceSuperWord && Verbose) {
tty->print_cr("SuperWord::mark_generations _ii_first error - found different generations in two nodes "); tty->print_cr("SuperWord::mark_generations: _ii_first was found before and not equal to one in this node (%d)", _ii_first);
ii->dump(); ii->dump();
if (ii_err!= 0) {
ii_err->dump(); ii_err->dump();
} }
}
#endif #endif
return -1; // this phi has Stores from different generations of unroll and cannot be simd/vectorized return -1; // this phi has Stores from different generations of unroll and cannot be simd/vectorized
} }
@ -3252,8 +3674,7 @@ int SuperWord::mark_generations() {
} }
// building order of iterations // building order of iterations
assert(_ii_order.length() == 0, "should be empty"); if (_ii_order.length() == 0 && ii_err != 0) {
if (ii_err != 0) {
assert(in_bb(ii_err) && ii_err->is_Store(), "should be Store in bb"); assert(in_bb(ii_err) && ii_err->is_Store(), "should be Store in bb");
Node* nd = ii_err; Node* nd = ii_err;
while(_clone_map.gen(nd->_idx) != _ii_last) { while(_clone_map.gen(nd->_idx) != _ii_last) {
@ -3261,7 +3682,7 @@ int SuperWord::mark_generations() {
bool found = false; bool found = false;
for (DUIterator_Fast imax, i = nd->fast_outs(imax); i < imax; i++) { for (DUIterator_Fast imax, i = nd->fast_outs(imax); i < imax; i++) {
Node* use = nd->fast_out(i); Node* use = nd->fast_out(i);
if (_clone_map.idx(use->_idx) == _clone_map.idx(nd->_idx) && use->as_Store()->in(MemNode::Memory) == nd) { if (same_origin_idx(use, nd) && use->as_Store()->in(MemNode::Memory) == nd) {
found = true; found = true;
nd = use; nd = use;
break; break;
@ -3303,7 +3724,7 @@ int SuperWord::mark_generations() {
bool SuperWord::fix_commutative_inputs(Node* gold, Node* fix) { bool SuperWord::fix_commutative_inputs(Node* gold, Node* fix) {
assert(gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul(), "should be only Add or Mul nodes"); assert(gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul(), "should be only Add or Mul nodes");
assert(_clone_map.idx(gold->_idx) == _clone_map.idx(fix->_idx), "should be clones of the same node"); assert(same_origin_idx(gold, fix), "should be clones of the same node");
Node* gin1 = gold->in(1); Node* gin1 = gold->in(1);
Node* gin2 = gold->in(2); Node* gin2 = gold->in(2);
Node* fin1 = fix->in(1); Node* fin1 = fix->in(1);
@ -3311,12 +3732,12 @@ bool SuperWord::fix_commutative_inputs(Node* gold, Node* fix) {
bool swapped = false; bool swapped = false;
if (in_bb(gin1) && in_bb(gin2) && in_bb(fin1) && in_bb(fin1)) { if (in_bb(gin1) && in_bb(gin2) && in_bb(fin1) && in_bb(fin1)) {
if (_clone_map.idx(gin1->_idx) == _clone_map.idx(fin1->_idx) && if (same_origin_idx(gin1, fin1) &&
_clone_map.idx(gin2->_idx) == _clone_map.idx(fin2->_idx)) { same_origin_idx(gin2, fin2)) {
return true; // nothing to fix return true; // nothing to fix
} }
if (_clone_map.idx(gin1->_idx) == _clone_map.idx(fin2->_idx) && if (same_origin_idx(gin1, fin2) &&
_clone_map.idx(gin2->_idx) == _clone_map.idx(fin1->_idx)) { same_origin_idx(gin2, fin1)) {
fix->swap_edges(1, 2); fix->swap_edges(1, 2);
swapped = true; swapped = true;
} }
@ -3364,7 +3785,7 @@ bool SuperWord::pack_parallel() {
for (int gen = 1; gen < _ii_order.length(); ++gen) { for (int gen = 1; gen < _ii_order.length(); ++gen) {
for (int kk = 0; kk < _block.length(); kk++) { for (int kk = 0; kk < _block.length(); kk++) {
Node* clone = _block.at(kk); Node* clone = _block.at(kk);
if (_clone_map.idx(clone->_idx) == _clone_map.idx(nd->_idx) && if (same_origin_idx(clone, nd) &&
_clone_map.gen(clone->_idx) == _ii_order.at(gen)) { _clone_map.gen(clone->_idx) == _ii_order.at(gen)) {
if (nd->is_Add() || nd->is_Mul()) { if (nd->is_Add() || nd->is_Mul()) {
fix_commutative_inputs(nd, clone); fix_commutative_inputs(nd, clone);
@ -3429,9 +3850,8 @@ bool SuperWord::hoist_loads_in_graph() {
if (ld->is_Load() && ld->as_Load()->in(MemNode::Memory) == n && in_bb(ld)) { if (ld->is_Load() && ld->as_Load()->in(MemNode::Memory) == n && in_bb(ld)) {
for (int i = 0; i < _block.length(); i++) { for (int i = 0; i < _block.length(); i++) {
Node* ld2 = _block.at(i); Node* ld2 = _block.at(i);
if (ld2->is_Load() && if (ld2->is_Load() && same_origin_idx(ld, ld2) &&
_clone_map.idx(ld->_idx) == _clone_map.idx(ld2->_idx) && !same_generation(ld, ld2)) { // <= do not collect the first generation ld
_clone_map.gen(ld->_idx) != _clone_map.gen(ld2->_idx)) { // <= do not collect the first generation ld
#ifndef PRODUCT #ifndef PRODUCT
if (_vector_loop_debug) { if (_vector_loop_debug) {
tty->print_cr("SuperWord::hoist_loads_in_graph: will try to hoist load ld2->_idx=%d, cloned from %d (ld->_idx=%d)", tty->print_cr("SuperWord::hoist_loads_in_graph: will try to hoist load ld2->_idx=%d, cloned from %d (ld->_idx=%d)",

View File

@ -203,6 +203,7 @@ class SWNodeInfo VALUE_OBJ_CLASS_SPEC {
// -----------------------------SuperWord--------------------------------- // -----------------------------SuperWord---------------------------------
// Transforms scalar operations into packed (superword) operations. // Transforms scalar operations into packed (superword) operations.
class SuperWord : public ResourceObj { class SuperWord : public ResourceObj {
friend class SWPointer;
private: private:
PhaseIdealLoop* _phase; PhaseIdealLoop* _phase;
Arena* _arena; Arena* _arena;
@ -247,8 +248,17 @@ class SuperWord : public ResourceObj {
PhaseIdealLoop* phase() { return _phase; } PhaseIdealLoop* phase() { return _phase; }
IdealLoopTree* lpt() { return _lpt; } IdealLoopTree* lpt() { return _lpt; }
PhiNode* iv() { return _iv; } PhiNode* iv() { return _iv; }
bool early_return() { return _early_return; } bool early_return() { return _early_return; }
#ifndef PRODUCT
bool is_debug() { return _vector_loop_debug > 0; }
bool is_trace_alignment() { return (_vector_loop_debug & 2) > 0; }
bool is_trace_mem_slice() { return (_vector_loop_debug & 4) > 0; }
bool is_trace_loop() { return (_vector_loop_debug & 8) > 0; }
bool is_trace_adjacent() { return (_vector_loop_debug & 16) > 0; }
#endif
bool do_vector_loop() { return _do_vector_loop; }
private: private:
IdealLoopTree* _lpt; // Current loop tree node IdealLoopTree* _lpt; // Current loop tree node
LoopNode* _lp; // Current LoopNode LoopNode* _lp; // Current LoopNode
@ -257,12 +267,14 @@ class SuperWord : public ResourceObj {
bool _race_possible; // In cases where SDMU is true bool _race_possible; // In cases where SDMU is true
bool _early_return; // True if we do not initialize bool _early_return; // True if we do not initialize
bool _do_vector_loop; // whether to do vectorization/simd style bool _do_vector_loop; // whether to do vectorization/simd style
bool _vector_loop_debug; // provide more printing in debug mode
int _num_work_vecs; // Number of non memory vector operations int _num_work_vecs; // Number of non memory vector operations
int _num_reductions; // Number of reduction expressions applied int _num_reductions; // Number of reduction expressions applied
int _ii_first; // generation with direct deps from mem phi int _ii_first; // generation with direct deps from mem phi
int _ii_last; // generation with direct deps to mem phi int _ii_last; // generation with direct deps to mem phi
GrowableArray<int> _ii_order; GrowableArray<int> _ii_order;
#ifndef PRODUCT
uintx _vector_loop_debug; // provide more printing in debug mode
#endif
// Accessors // Accessors
Arena* arena() { return _arena; } Arena* arena() { return _arena; }
@ -325,12 +337,20 @@ class SuperWord : public ResourceObj {
Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; } Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; } void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }
// CloneMap utilities
bool same_origin_idx(Node* a, Node* b) const;
bool same_generation(Node* a, Node* b) const;
// methods // methods
// Extract the superword level parallelism // Extract the superword level parallelism
void SLP_extract(); void SLP_extract();
// Find the adjacent memory references and create pack pairs for them. // Find the adjacent memory references and create pack pairs for them.
void find_adjacent_refs(); void find_adjacent_refs();
// Tracing support
#ifndef PRODUCT
void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment);
#endif
// Find a memory reference to align the loop induction variable to. // Find a memory reference to align the loop induction variable to.
MemNode* find_align_to_ref(Node_List &memops); MemNode* find_align_to_ref(Node_List &memops);
// Calculate loop's iv adjustment for this memory ops. // Calculate loop's iv adjustment for this memory ops.
@ -340,13 +360,13 @@ class SuperWord : public ResourceObj {
// rebuild the graph so all loads in different iterations of cloned loop become dependant on phi node (in _do_vector_loop only) // rebuild the graph so all loads in different iterations of cloned loop become dependant on phi node (in _do_vector_loop only)
bool hoist_loads_in_graph(); bool hoist_loads_in_graph();
// Test whether MemNode::Memory dependency to the same load but in the first iteration of this loop is coming from memory phi // Test whether MemNode::Memory dependency to the same load but in the first iteration of this loop is coming from memory phi
// Return false if failed. // Return false if failed
Node* find_phi_for_mem_dep(LoadNode* ld); Node* find_phi_for_mem_dep(LoadNode* ld);
// Return same node but from the first generation. Return 0, if not found // Return same node but from the first generation. Return 0, if not found
Node* first_node(Node* nd); Node* first_node(Node* nd);
// Return same node as this but from the last generation. Return 0, if not found // Return same node as this but from the last generation. Return 0, if not found
Node* last_node(Node* n); Node* last_node(Node* n);
// Mark nodes belonging to first and last generation, // Mark nodes belonging to first and last generation
// returns first generation index or -1 if vectorization/simd is impossible // returns first generation index or -1 if vectorization/simd is impossible
int mark_generations(); int mark_generations();
// swapping inputs of commutative instruction (Add or Mul) // swapping inputs of commutative instruction (Add or Mul)
@ -483,10 +503,7 @@ class SWPointer VALUE_OBJ_CLASS_SPEC {
IdealLoopTree* lpt() { return _slp->lpt(); } IdealLoopTree* lpt() { return _slp->lpt(); }
PhiNode* iv() { return _slp->iv(); } // Induction var PhiNode* iv() { return _slp->iv(); } // Induction var
bool invariant(Node* n) { bool invariant(Node* n);
Node *n_c = phase()->get_ctrl(n);
return !lpt()->is_member(phase()->get_loop(n_c));
}
// Match: k*iv + offset // Match: k*iv + offset
bool scaled_iv_plus_offset(Node* n); bool scaled_iv_plus_offset(Node* n);
@ -545,6 +562,76 @@ class SWPointer VALUE_OBJ_CLASS_SPEC {
static bool comparable(int cmp) { return cmp < NotComparable; } static bool comparable(int cmp) { return cmp < NotComparable; }
void print(); void print();
#ifndef PRODUCT
class Tracer {
friend class SuperWord;
friend class SWPointer;
SuperWord* _slp;
static int _depth;
int _depth_save;
void print_depth();
int depth() const { return _depth; }
void set_depth(int d) { _depth = d; }
void inc_depth() { _depth++;}
void dec_depth() { if (_depth > 0) _depth--;}
void store_depth() {_depth_save = _depth;}
void restore_depth() {_depth = _depth_save;}
class Depth {
friend class Tracer;
friend class SWPointer;
friend class SuperWord;
Depth() { ++_depth; }
Depth(int x) { _depth = 0; }
~Depth() { if (_depth > 0) --_depth;}
};
Tracer (SuperWord* slp) : _slp(slp) {}
// tracing functions
void ctor_1(Node* mem);
void ctor_2(Node* adr);
void ctor_3(Node* adr, int i);
void ctor_4(Node* adr, int i);
void ctor_5(Node* adr, Node* base, int i);
void ctor_6(Node* mem);
void invariant_1(Node *n, Node *n_c);
void scaled_iv_plus_offset_1(Node* n);
void scaled_iv_plus_offset_2(Node* n);
void scaled_iv_plus_offset_3(Node* n);
void scaled_iv_plus_offset_4(Node* n);
void scaled_iv_plus_offset_5(Node* n);
void scaled_iv_plus_offset_6(Node* n);
void scaled_iv_plus_offset_7(Node* n);
void scaled_iv_plus_offset_8(Node* n);
void scaled_iv_1(Node* n);
void scaled_iv_2(Node* n, int scale);
void scaled_iv_3(Node* n, int scale);
void scaled_iv_4(Node* n, int scale);
void scaled_iv_5(Node* n, int scale);
void scaled_iv_6(Node* n, int scale);
void scaled_iv_7(Node* n);
void scaled_iv_8(Node* n, SWPointer* tmp);
void scaled_iv_9(Node* n, int _scale, int _offset, int mult);
void scaled_iv_10(Node* n);
void offset_plus_k_1(Node* n);
void offset_plus_k_2(Node* n, int _offset);
void offset_plus_k_3(Node* n, int _offset);
void offset_plus_k_4(Node* n);
void offset_plus_k_5(Node* n, Node* _invar);
void offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset);
void offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset);
void offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset);
void offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset);
void offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset);
void offset_plus_k_11(Node* n);
} _tracer;//TRacer;
#endif
}; };