8076284: Improve vectorization of parallel streams
Improve vectorization of java/util/stream/Streams$RangeIntSpliterator::forEachRemaining() method and enable loop vectorization in a given method on demand. Reviewed-by: kvn
This commit is contained in:
parent
cf9c335865
commit
c7d33de202
@ -591,6 +591,9 @@
|
||||
template(classLoader_name, "classLoader") \
|
||||
template(componentType_name, "componentType") \
|
||||
\
|
||||
/* forEachRemaining support */ \
|
||||
template(java_util_stream_StreamsRangeIntSpliterator, "java/util/stream/Streams$RangeIntSpliterator") \
|
||||
\
|
||||
/* trace signatures */ \
|
||||
TRACE_TEMPLATES(template) \
|
||||
\
|
||||
@ -1121,6 +1124,11 @@
|
||||
do_intrinsic(_Double_valueOf, java_lang_Double, valueOf_name, Double_valueOf_signature, F_S) \
|
||||
do_name( Double_valueOf_signature, "(D)Ljava/lang/Double;") \
|
||||
\
|
||||
/* forEachRemaining */ \
|
||||
do_intrinsic(_forEachRemaining, java_util_stream_StreamsRangeIntSpliterator, forEachRemaining_name, forEachRemaining_signature, F_R) \
|
||||
do_name( forEachRemaining_name, "forEachRemaining") \
|
||||
do_name( forEachRemaining_signature, "(Ljava/util/function/IntConsumer;)V") \
|
||||
|
||||
/*end*/
|
||||
|
||||
|
||||
|
@ -318,6 +318,9 @@
|
||||
notproduct(bool, TraceLoopUnswitching, false, \
|
||||
"Trace loop unswitching") \
|
||||
\
|
||||
product(bool, AllowVectorizeOnDemand, true, \
|
||||
"Globally supress vectorization set in VectorizeMethod") \
|
||||
\
|
||||
product(bool, UseSuperWord, true, \
|
||||
"Transform scalar operations into superword operations") \
|
||||
\
|
||||
|
@ -453,6 +453,8 @@ CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) {
|
||||
assert(compile == Compile::current(), "sanity");
|
||||
|
||||
compile->set_type_dict(NULL);
|
||||
compile->set_clone_map(new Dict(cmpkey, hashkey, _compile->comp_arena()));
|
||||
compile->clone_map().set_clone_idx(0);
|
||||
compile->set_type_hwm(NULL);
|
||||
compile->set_type_last_size(0);
|
||||
compile->set_last_tf(NULL, NULL);
|
||||
@ -462,6 +464,7 @@ CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) {
|
||||
Type::Initialize(compile);
|
||||
_compile->set_scratch_buffer_blob(NULL);
|
||||
_compile->begin_method();
|
||||
_compile->clone_map().set_debug(_compile->has_method() && _compile->method_has_option(_compile->clone_map().debug_option_name));
|
||||
}
|
||||
CompileWrapper::~CompileWrapper() {
|
||||
_compile->end_method();
|
||||
@ -1091,6 +1094,24 @@ void Compile::Init(int aliaslevel) {
|
||||
set_do_scheduling(OptoScheduling);
|
||||
set_do_count_invocations(false);
|
||||
set_do_method_data_update(false);
|
||||
|
||||
set_do_vector_loop(false);
|
||||
|
||||
bool do_vector = false;
|
||||
if (AllowVectorizeOnDemand) {
|
||||
if (has_method() && (method()->has_option("Vectorize") || method()->has_option("VectorizeDebug"))) {
|
||||
set_do_vector_loop(true);
|
||||
} else if (has_method() && method()->name() != 0 &&
|
||||
method()->intrinsic_id() == vmIntrinsics::_forEachRemaining) {
|
||||
set_do_vector_loop(true);
|
||||
}
|
||||
#ifndef PRODUCT
|
||||
if (do_vector_loop() && Verbose) {
|
||||
tty->print("Compile::Init: do vectorized loops (SIMD like) for method %s\n", method()->name()->as_quoted_ascii());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
set_age_code(has_method() && method()->profile_aging());
|
||||
set_rtm_state(NoRTM); // No RTM lock eliding by default
|
||||
method_has_option_value("MaxNodeLimit", _max_node_limit);
|
||||
@ -4334,3 +4355,63 @@ bool Compile::randomized_select(int count) {
|
||||
assert(count > 0, "only positive");
|
||||
return (os::random() & RANDOMIZED_DOMAIN_MASK) < (RANDOMIZED_DOMAIN / count);
|
||||
}
|
||||
|
||||
const char* CloneMap::debug_option_name = "CloneMapDebug";
|
||||
CloneMap& Compile::clone_map() { return _clone_map; }
|
||||
void Compile::set_clone_map(Dict* d) { _clone_map._dict = d; }
|
||||
|
||||
void NodeCloneInfo::dump() const {
|
||||
tty->print(" {%d:%d} ", idx(), gen());
|
||||
}
|
||||
|
||||
void CloneMap::clone(Node* old, Node* nnn, int gen) {
|
||||
uint64_t val = value(old->_idx);
|
||||
NodeCloneInfo cio(val);
|
||||
assert(val != 0, "old node should be in the map");
|
||||
NodeCloneInfo cin(cio.idx(), gen + cio.gen());
|
||||
insert(nnn->_idx, cin.get());
|
||||
#ifndef PRODUCT
|
||||
if (is_debug()) {
|
||||
tty->print_cr("CloneMap::clone inserted node %d info {%d:%d} into CloneMap", nnn->_idx, cin.idx(), cin.gen());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void CloneMap::verify_insert_and_clone(Node* old, Node* nnn, int gen) {
|
||||
NodeCloneInfo cio(value(old->_idx));
|
||||
if (cio.get() == 0) {
|
||||
cio.set(old->_idx, 0);
|
||||
insert(old->_idx, cio.get());
|
||||
#ifndef PRODUCT
|
||||
if (is_debug()) {
|
||||
tty->print_cr("CloneMap::verify_insert_and_clone inserted node %d info {%d:%d} into CloneMap", old->_idx, cio.idx(), cio.gen());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
clone(old, nnn, gen);
|
||||
}
|
||||
|
||||
int CloneMap::max_gen() const {
|
||||
int g = 0;
|
||||
DictI di(_dict);
|
||||
for(; di.test(); ++di) {
|
||||
int t = gen(di._key);
|
||||
if (g < t) {
|
||||
g = t;
|
||||
#ifndef PRODUCT
|
||||
if (is_debug()) {
|
||||
tty->print_cr("CloneMap::max_gen() update max=%d from %d", g, _2_node_idx_t(di._key));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
void CloneMap::dump(node_idx_t key) const {
|
||||
uint64_t val = value(key);
|
||||
if (val != 0) {
|
||||
NodeCloneInfo ni(val);
|
||||
ni.dump();
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -47,6 +47,7 @@ class Block;
|
||||
class Bundle;
|
||||
class C2Compiler;
|
||||
class CallGenerator;
|
||||
class CloneMap;
|
||||
class ConnectionGraph;
|
||||
class InlineTree;
|
||||
class Int_Array;
|
||||
@ -59,6 +60,7 @@ class MachSafePointNode;
|
||||
class Node;
|
||||
class Node_Array;
|
||||
class Node_Notes;
|
||||
class NodeCloneInfo;
|
||||
class OptoReg;
|
||||
class PhaseCFG;
|
||||
class PhaseGVN;
|
||||
@ -84,6 +86,62 @@ class WarmCallInfo;
|
||||
class Node_Stack;
|
||||
struct Final_Reshape_Counts;
|
||||
|
||||
typedef unsigned int node_idx_t;
|
||||
class NodeCloneInfo {
|
||||
private:
|
||||
uint64_t _idx_clone_orig;
|
||||
public:
|
||||
|
||||
void set_idx(node_idx_t idx) {
|
||||
_idx_clone_orig = _idx_clone_orig & 0xFFFFFFFF00000000 | idx;
|
||||
}
|
||||
node_idx_t idx() const { return (node_idx_t)(_idx_clone_orig & 0xFFFFFFFF); }
|
||||
|
||||
void set_gen(int generation) {
|
||||
uint64_t g = (uint64_t)generation << 32;
|
||||
_idx_clone_orig = _idx_clone_orig & 0xFFFFFFFF | g;
|
||||
}
|
||||
int gen() const { return (int)(_idx_clone_orig >> 32); }
|
||||
|
||||
void set(uint64_t x) { _idx_clone_orig = x; }
|
||||
void set(node_idx_t x, int g) { set_idx(x); set_gen(g); }
|
||||
uint64_t get() const { return _idx_clone_orig; }
|
||||
|
||||
NodeCloneInfo(uint64_t idx_clone_orig) : _idx_clone_orig(idx_clone_orig) {}
|
||||
NodeCloneInfo(node_idx_t x, int g) {set(x, g);}
|
||||
|
||||
void dump() const;
|
||||
};
|
||||
|
||||
class CloneMap {
|
||||
friend class Compile;
|
||||
private:
|
||||
bool _debug;
|
||||
Dict* _dict;
|
||||
int _clone_idx; // current cloning iteration/generation in loop unroll
|
||||
public:
|
||||
void* _2p(node_idx_t key) const { return (void*)(intptr_t)key; } // 2 conversion functions to make gcc happy
|
||||
node_idx_t _2_node_idx_t(const void* k) const { return (node_idx_t)(intptr_t)k; }
|
||||
Dict* dict() const { return _dict; }
|
||||
void insert(node_idx_t key, uint64_t val) { assert(_dict->operator[](_2p(key)) == NULL, "key existed"); _dict->Insert(_2p(key), (void*)val); }
|
||||
void insert(node_idx_t key, NodeCloneInfo& ci) { insert(key, ci.get()); }
|
||||
void remove(node_idx_t key) { _dict->Delete(_2p(key)); }
|
||||
uint64_t value(node_idx_t key) const { return (uint64_t)_dict->operator[](_2p(key)); }
|
||||
node_idx_t idx(node_idx_t key) const { return NodeCloneInfo(value(key)).idx(); }
|
||||
int gen(node_idx_t key) const { return NodeCloneInfo(value(key)).gen(); }
|
||||
int gen(const void* k) const { return gen(_2_node_idx_t(k)); }
|
||||
int max_gen() const;
|
||||
void clone(Node* old, Node* nnn, int gen);
|
||||
void verify_insert_and_clone(Node* old, Node* nnn, int gen);
|
||||
void dump(node_idx_t key) const;
|
||||
|
||||
int clone_idx() const { return _clone_idx; }
|
||||
void set_clone_idx(int x) { _clone_idx = x; }
|
||||
bool is_debug() const { return _debug; }
|
||||
void set_debug(bool debug) { _debug = debug; }
|
||||
static const char* debug_option_name;
|
||||
};
|
||||
|
||||
//------------------------------Compile----------------------------------------
|
||||
// This class defines a top-level Compiler invocation.
|
||||
|
||||
@ -312,6 +370,7 @@ class Compile : public Phase {
|
||||
bool _do_freq_based_layout; // True if we intend to do frequency based block layout
|
||||
bool _do_count_invocations; // True if we generate code to count invocations
|
||||
bool _do_method_data_update; // True if we generate code to update MethodData*s
|
||||
bool _do_vector_loop; // True if allowed to execute loop in parallel iterations
|
||||
bool _age_code; // True if we need to profile code age (decrement the aging counter)
|
||||
int _AliasLevel; // Locally-adjusted version of AliasLevel flag.
|
||||
bool _print_assembly; // True if we should dump assembly code for this compilation
|
||||
@ -380,6 +439,7 @@ class Compile : public Phase {
|
||||
Arena _Compile_types; // Arena for all types
|
||||
Arena* _type_arena; // Alias for _Compile_types except in Initialize_shared()
|
||||
Dict* _type_dict; // Intern table
|
||||
CloneMap _clone_map; // used for recording history of cloned nodes
|
||||
void* _type_hwm; // Last allocation (see Type::operator new/delete)
|
||||
size_t _type_last_size; // Last allocation size (see Type::operator new/delete)
|
||||
ciMethod* _last_tf_m; // Cache for
|
||||
@ -586,6 +646,8 @@ class Compile : public Phase {
|
||||
void set_do_count_invocations(bool z){ _do_count_invocations = z; }
|
||||
bool do_method_data_update() const { return _do_method_data_update; }
|
||||
void set_do_method_data_update(bool z) { _do_method_data_update = z; }
|
||||
bool do_vector_loop() const { return _do_vector_loop; }
|
||||
void set_do_vector_loop(bool z) { _do_vector_loop = z; }
|
||||
bool age_code() const { return _age_code; }
|
||||
void set_age_code(bool z) { _age_code = z; }
|
||||
int AliasLevel() const { return _AliasLevel; }
|
||||
@ -1228,6 +1290,11 @@ class Compile : public Phase {
|
||||
|
||||
// Auxiliary method for randomized fuzzing/stressing
|
||||
static bool randomized_select(int count);
|
||||
|
||||
// supporting clone_map
|
||||
CloneMap& clone_map();
|
||||
void set_clone_map(Dict* d);
|
||||
|
||||
};
|
||||
|
||||
#endif // SHARE_VM_OPTO_COMPILE_HPP
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -1210,6 +1210,16 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
|
||||
}
|
||||
loop->dump_head();
|
||||
}
|
||||
|
||||
if (C->do_vector_loop() && (PrintOpto && VerifyLoopOptimizations || TraceLoopOpts)) {
|
||||
Arena* arena = Thread::current()->resource_area();
|
||||
Node_Stack stack(arena, C->unique() >> 2);
|
||||
Node_List rpo_list;
|
||||
VectorSet visited(arena);
|
||||
visited.set(loop_head->_idx);
|
||||
rpo( loop_head, stack, visited, rpo_list );
|
||||
dump(loop, rpo_list.size(), rpo_list );
|
||||
}
|
||||
#endif
|
||||
|
||||
// Remember loop node count before unrolling to detect
|
||||
@ -1497,6 +1507,30 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
|
||||
}
|
||||
|
||||
loop->record_for_igvn();
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (C->do_vector_loop() && (PrintOpto && VerifyLoopOptimizations || TraceLoopOpts)) {
|
||||
tty->print("\nnew loop after unroll\n"); loop->dump_head();
|
||||
for (uint i = 0; i < loop->_body.size(); i++) {
|
||||
loop->_body.at(i)->dump();
|
||||
}
|
||||
if(C->clone_map().is_debug()) {
|
||||
tty->print("\nCloneMap\n");
|
||||
Dict* dict = C->clone_map().dict();
|
||||
DictI i(dict);
|
||||
tty->print_cr("Dict@%p[%d] = ", dict, dict->Size());
|
||||
for (int ii = 0; i.test(); ++i, ++ii) {
|
||||
NodeCloneInfo cl((uint64_t)dict->operator[]((void*)i._key));
|
||||
tty->print("%d->%d:%d,", (int)(intptr_t)i._key, cl.idx(), cl.gen());
|
||||
if (ii % 10 == 9) {
|
||||
tty->print_cr(" ");
|
||||
}
|
||||
}
|
||||
tty->print_cr(" ");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
//------------------------------do_maximally_unroll----------------------------
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -3678,6 +3678,7 @@ void PhaseIdealLoop::dump( ) const {
|
||||
}
|
||||
|
||||
void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
|
||||
CloneMap& cm = C->clone_map();
|
||||
loop->dump_head();
|
||||
|
||||
// Now scan for CFG nodes in the same loop
|
||||
@ -3709,6 +3710,7 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list )
|
||||
cached_idom = find_non_split_ctrl(cached_idom);
|
||||
}
|
||||
tty->print(" ID:%d",computed_idom->_idx);
|
||||
cm.dump(n->_idx);
|
||||
n->dump();
|
||||
if( cached_idom != computed_idom ) {
|
||||
tty->print_cr("*** BROKEN IDOM! Computed as: %d, cached as: %d",
|
||||
@ -3728,6 +3730,7 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list )
|
||||
for( uint j = 0; j < loop->_nest; j++ )
|
||||
tty->print(" ");
|
||||
tty->print(" ");
|
||||
cm.dump(m->_idx);
|
||||
m->dump();
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -1267,12 +1267,36 @@ void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
|
||||
void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
|
||||
Node* side_by_side_idom) {
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (C->do_vector_loop() && PrintOpto) {
|
||||
const char* mname = C->method()->name()->as_quoted_ascii();
|
||||
if (mname != NULL) {
|
||||
tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
CloneMap& cm = C->clone_map();
|
||||
Dict* dict = cm.dict();
|
||||
if (C->do_vector_loop()) {
|
||||
cm.set_clone_idx(cm.max_gen()+1);
|
||||
#ifndef PRODUCT
|
||||
if (PrintOpto) {
|
||||
tty->print_cr("PhaseIdealLoop::clone_loop: _clone_idx %d", cm.clone_idx());
|
||||
loop->dump_head();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Step 1: Clone the loop body. Make the old->new mapping.
|
||||
uint i;
|
||||
for( i = 0; i < loop->_body.size(); i++ ) {
|
||||
Node *old = loop->_body.at(i);
|
||||
Node *nnn = old->clone();
|
||||
old_new.map( old->_idx, nnn );
|
||||
if (C->do_vector_loop()) {
|
||||
cm.verify_insert_and_clone(old, nnn, cm.clone_idx());
|
||||
}
|
||||
_igvn.register_new_node_with_optimizer(nnn);
|
||||
}
|
||||
|
||||
@ -1335,6 +1359,9 @@ void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd
|
||||
|
||||
// Clone the loop exit control projection
|
||||
Node *newuse = use->clone();
|
||||
if (C->do_vector_loop()) {
|
||||
cm.verify_insert_and_clone(use, newuse, cm.clone_idx());
|
||||
}
|
||||
newuse->set_req(0,nnn);
|
||||
_igvn.register_new_node_with_optimizer(newuse);
|
||||
set_loop(newuse, use_loop);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -1658,6 +1658,9 @@ void Node::dump(const char* suffix, outputStream *st) const {
|
||||
return; // don't process dead nodes
|
||||
}
|
||||
|
||||
if (C->clone_map().value(_idx) != 0) {
|
||||
C->clone_map().dump(_idx);
|
||||
}
|
||||
// Dump node-specific info
|
||||
dump_spec(st);
|
||||
#ifdef ASSERT
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -54,6 +54,7 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
|
||||
_mem_slice_head(arena(), 8, 0, NULL), // memory slice heads
|
||||
_mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails
|
||||
_node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
|
||||
_clone_map(phase->C->clone_map()), // map of nodes created in cloning
|
||||
_align_to_ref(NULL), // memory reference to align vectors to
|
||||
_disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
|
||||
_dg(_arena), // dependence graph
|
||||
@ -68,7 +69,12 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
|
||||
_iv(NULL), // induction var
|
||||
_race_possible(false), // cases where SDMU is true
|
||||
_num_work_vecs(0), // amount of vector work we have
|
||||
_num_reductions(0) // amount of reduction work we have
|
||||
_num_reductions(0), // amount of reduction work we have
|
||||
_do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style
|
||||
_ii_first(-1), // first loop generation index - only if do_vector_loop()
|
||||
_ii_last(-1), // last loop generation index - only if do_vector_loop()
|
||||
_ii_order(arena(), 8, 0, 0),
|
||||
_vector_loop_debug(phase->C->has_method() && phase->C->method_has_option("VectorizeDebug"))
|
||||
{}
|
||||
|
||||
//------------------------------transform_loop---------------------------
|
||||
@ -147,14 +153,53 @@ void SuperWord::transform_loop(IdealLoopTree* lpt) {
|
||||
//
|
||||
void SuperWord::SLP_extract() {
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (_do_vector_loop && TraceSuperWord) {
|
||||
tty->print("SuperWord::SLP_extract\n");
|
||||
tty->print("input loop\n");
|
||||
_lpt->dump_head();
|
||||
_lpt->dump();
|
||||
for (uint i = 0; i < _lpt->_body.size(); i++) {
|
||||
_lpt->_body.at(i)->dump();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// Ready the block
|
||||
if (!construct_bb())
|
||||
if (!construct_bb()) {
|
||||
return; // Exit if no interesting nodes or complex graph.
|
||||
|
||||
}
|
||||
// build _dg, _disjoint_ptrs
|
||||
dependence_graph();
|
||||
|
||||
// compute function depth(Node*)
|
||||
compute_max_depth();
|
||||
|
||||
if (_do_vector_loop) {
|
||||
if (mark_generations() != -1) {
|
||||
hoist_loads_in_graph(); // this only rebuild the graph; all basic structs need rebuild explicitly
|
||||
|
||||
if (!construct_bb()) {
|
||||
return; // Exit if no interesting nodes or complex graph.
|
||||
}
|
||||
dependence_graph();
|
||||
compute_max_depth();
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord) {
|
||||
tty->print_cr("\nSuperWord::_do_vector_loop: graph after hoist_loads_in_graph");
|
||||
_lpt->dump_head();
|
||||
for (int j = 0; j < _block.length(); j++) {
|
||||
Node* n = _block.at(j);
|
||||
int d = depth(n);
|
||||
for (int i = 0; i < d; i++) tty->print("%s", " ");
|
||||
tty->print("%d :", d);
|
||||
n->dump();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
compute_vector_element_type();
|
||||
|
||||
// Attempt vectorization
|
||||
@ -163,6 +208,17 @@ void SuperWord::SLP_extract() {
|
||||
|
||||
extend_packlist();
|
||||
|
||||
if (_do_vector_loop) {
|
||||
if (_packset.length() == 0) {
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord) {
|
||||
tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway");
|
||||
}
|
||||
#endif
|
||||
pack_parallel();
|
||||
}
|
||||
}
|
||||
|
||||
combine_packs();
|
||||
|
||||
construct_my_pack_map();
|
||||
@ -228,7 +284,7 @@ void SuperWord::find_adjacent_refs() {
|
||||
// Create initial pack pairs of memory operations for which
|
||||
// alignment is set and vectors will be aligned.
|
||||
bool create_pack = true;
|
||||
if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
|
||||
if (memory_alignment(mem_ref, best_iv_adjustment) == 0 || _do_vector_loop) {
|
||||
if (!Matcher::misaligned_vectors_ok()) {
|
||||
int vw = vector_width(mem_ref);
|
||||
int vw_best = vector_width(best_align_to_mem_ref);
|
||||
@ -274,7 +330,9 @@ void SuperWord::find_adjacent_refs() {
|
||||
Node_List* pair = new Node_List();
|
||||
pair->push(s1);
|
||||
pair->push(s2);
|
||||
_packset.append(pair);
|
||||
if (!_do_vector_loop || _clone_map.idx(s1->_idx) == _clone_map.idx(s2->_idx)) {
|
||||
_packset.append(pair);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -419,7 +477,7 @@ MemNode* SuperWord::find_align_to_ref(Node_List &memops) {
|
||||
|
||||
#ifdef ASSERT
|
||||
if (TraceSuperWord && Verbose) {
|
||||
tty->print_cr("\nVector memops after find_align_to_refs");
|
||||
tty->print_cr("\nVector memops after find_align_to_ref");
|
||||
for (uint i = 0; i < memops.size(); i++) {
|
||||
MemNode* s = memops.at(i)->as_Mem();
|
||||
s->dump();
|
||||
@ -528,6 +586,14 @@ void SuperWord::dependence_graph() {
|
||||
// Get slice in predecessor order (last is first)
|
||||
mem_slice_preds(n_tail, n, _nlist);
|
||||
|
||||
#ifndef PRODUCT
|
||||
if(TraceSuperWord && Verbose) {
|
||||
tty->print_cr("SuperWord::dependence_graph: built a new mem slice");
|
||||
for (int j = _nlist.length() - 1; j >= 0 ; j--) {
|
||||
_nlist.at(j)->dump();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// Make the slice dependent on the root
|
||||
DepMem* slice = _dg.dep(n);
|
||||
_dg.make_edge(_dg.root(), slice);
|
||||
@ -2362,6 +2428,8 @@ void SuperWord::init() {
|
||||
_data_entry.clear();
|
||||
_mem_slice_head.clear();
|
||||
_mem_slice_tail.clear();
|
||||
_iteration_first.clear();
|
||||
_iteration_last.clear();
|
||||
_node_info.clear();
|
||||
_align_to_ref = NULL;
|
||||
_lpt = NULL;
|
||||
@ -2373,6 +2441,18 @@ void SuperWord::init() {
|
||||
_num_reductions = 0;
|
||||
}
|
||||
|
||||
//------------------------------restart---------------------------
|
||||
void SuperWord::restart() {
|
||||
_dg.init();
|
||||
_packset.clear();
|
||||
_disjoint_ptrs.clear();
|
||||
_block.clear();
|
||||
_data_entry.clear();
|
||||
_mem_slice_head.clear();
|
||||
_mem_slice_tail.clear();
|
||||
_node_info.clear();
|
||||
}
|
||||
|
||||
//------------------------------print_packset---------------------------
|
||||
void SuperWord::print_packset() {
|
||||
#ifndef PRODUCT
|
||||
@ -2750,3 +2830,401 @@ void DepSuccs::next() {
|
||||
_done = true;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// --------------------------------- vectorization/simd -----------------------------------
|
||||
//
|
||||
Node* SuperWord::find_phi_for_mem_dep(LoadNode* ld) {
|
||||
assert(in_bb(ld), "must be in block");
|
||||
if (_clone_map.gen(ld->_idx) == _ii_first) {
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(ld->_idx)=%d",
|
||||
_clone_map.gen(ld->_idx));
|
||||
}
|
||||
#endif
|
||||
return NULL; //we think that any ld in the first gen being vectorizable
|
||||
}
|
||||
|
||||
Node* mem = ld->in(MemNode::Memory);
|
||||
if (mem->outcnt() <= 1) {
|
||||
// we don't want to remove the only edge from mem node to load
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::find_phi_for_mem_dep input node %d to load %d has no other outputs and edge mem->load cannot be removed",
|
||||
mem->_idx, ld->_idx);
|
||||
ld->dump();
|
||||
mem->dump();
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
if (!in_bb(mem) || _clone_map.gen(mem->_idx) == _clone_map.gen(ld->_idx)) {
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::find_phi_for_mem_dep _clone_map.gen(mem->_idx)=%d",
|
||||
_clone_map.gen(mem->_idx));
|
||||
}
|
||||
#endif
|
||||
return NULL; // does not depend on loop volatile node or depends on the same generation
|
||||
}
|
||||
|
||||
//otherwise first node should depend on mem-phi
|
||||
Node* first = first_node(ld);
|
||||
assert(first->is_Load(), "must be Load");
|
||||
Node* phi = first->as_Load()->in(MemNode::Memory);
|
||||
if (!phi->is_Phi() || phi->bottom_type() != Type::MEMORY) {
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::find_phi_for_mem_dep load is not vectorizable node, since it's `first` does not take input from mem phi");
|
||||
ld->dump();
|
||||
first->dump();
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Node* tail = 0;
|
||||
for (int m = 0; m < _mem_slice_head.length(); m++) {
|
||||
if (_mem_slice_head.at(m) == phi) {
|
||||
tail = _mem_slice_tail.at(m);
|
||||
}
|
||||
}
|
||||
if (tail == 0) { //test that found phi is in the list _mem_slice_head
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::find_phi_for_mem_dep load %d is not vectorizable node, its phi %d is not _mem_slice_head",
|
||||
ld->_idx, phi->_idx);
|
||||
ld->dump();
|
||||
phi->dump();
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// now all conditions are met
|
||||
return phi;
|
||||
}
|
||||
|
||||
Node* SuperWord::first_node(Node* nd) {
|
||||
for (int ii = 0; ii < _iteration_first.length(); ii++) {
|
||||
Node* nnn = _iteration_first.at(ii);
|
||||
if (_clone_map.idx(nnn->_idx) == _clone_map.idx(nd->_idx)) {
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::first_node: %d is the first iteration node for %d (_clone_map.idx(nnn->_idx) = %d)",
|
||||
nnn->_idx, nd->_idx, _clone_map.idx(nnn->_idx));
|
||||
}
|
||||
#endif
|
||||
return nnn;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::first_node: did not find first iteration node for %d (_clone_map.idx(nd->_idx)=%d)",
|
||||
nd->_idx, _clone_map.idx(nd->_idx));
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
Node* SuperWord::last_node(Node* nd) {
|
||||
for (int ii = 0; ii < _iteration_last.length(); ii++) {
|
||||
Node* nnn = _iteration_last.at(ii);
|
||||
if (_clone_map.idx(nnn->_idx) == _clone_map.idx(nd->_idx)) {
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::last_node _clone_map.idx(nnn->_idx)=%d, _clone_map.idx(nd->_idx)=%d",
|
||||
_clone_map.idx(nnn->_idx), _clone_map.idx(nd->_idx));
|
||||
}
|
||||
#endif
|
||||
return nnn;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int SuperWord::mark_generations() {
|
||||
Node *ii_err = 0, *tail_err;
|
||||
for (int i = 0; i < _mem_slice_head.length(); i++) {
|
||||
Node* phi = _mem_slice_head.at(i);
|
||||
assert(phi->is_Phi(), "must be phi");
|
||||
|
||||
Node* tail = _mem_slice_tail.at(i);
|
||||
if (_ii_last == -1) {
|
||||
tail_err = tail;
|
||||
_ii_last = _clone_map.gen(tail->_idx);
|
||||
}
|
||||
else if (_ii_last != _clone_map.gen(tail->_idx)) {
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord && Verbose) {
|
||||
tty->print_cr("SuperWord::mark_generations _ii_last error - found different generations in two tail nodes ");
|
||||
tail->dump();
|
||||
tail_err->dump();
|
||||
}
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
// find first iteration in the loop
|
||||
for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
|
||||
Node* ii = phi->fast_out(i);
|
||||
if (in_bb(ii) && ii->is_Store()) { // we speculate that normally Stores of one and one only generation have deps from mem phi
|
||||
if (_ii_first == -1) {
|
||||
ii_err = ii;
|
||||
_ii_first = _clone_map.gen(ii->_idx);
|
||||
} else if (_ii_first != _clone_map.gen(ii->_idx)) {
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord && Verbose) {
|
||||
tty->print_cr("SuperWord::mark_generations _ii_first error - found different generations in two nodes ");
|
||||
ii->dump();
|
||||
ii_err->dump();
|
||||
}
|
||||
#endif
|
||||
return -1; // this phi has Stores from different generations of unroll and cannot be simd/vectorized
|
||||
}
|
||||
}
|
||||
}//for (DUIterator_Fast imax,
|
||||
}//for (int i...
|
||||
|
||||
if (_ii_first == -1 || _ii_last == -1) {
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord && Verbose) {
|
||||
tty->print_cr("SuperWord::mark_generations unknown error, something vent wrong");
|
||||
}
|
||||
#endif
|
||||
return -1; // something vent wrong
|
||||
}
|
||||
// collect nodes in the first and last generations
|
||||
assert(_iteration_first.length() == 0, "_iteration_first must be empty");
|
||||
assert(_iteration_last.length() == 0, "_iteration_last must be empty");
|
||||
for (int j = 0; j < _block.length(); j++) {
|
||||
Node* n = _block.at(j);
|
||||
node_idx_t gen = _clone_map.gen(n->_idx);
|
||||
if ((signed)gen == _ii_first) {
|
||||
_iteration_first.push(n);
|
||||
} else if ((signed)gen == _ii_last) {
|
||||
_iteration_last.push(n);
|
||||
}
|
||||
}
|
||||
|
||||
// building order of iterations
|
||||
assert(_ii_order.length() == 0, "should be empty");
|
||||
if (ii_err != 0) {
|
||||
assert(in_bb(ii_err) && ii_err->is_Store(), "should be Store in bb");
|
||||
Node* nd = ii_err;
|
||||
while(_clone_map.gen(nd->_idx) != _ii_last) {
|
||||
_ii_order.push(_clone_map.gen(nd->_idx));
|
||||
bool found = false;
|
||||
for (DUIterator_Fast imax, i = nd->fast_outs(imax); i < imax; i++) {
|
||||
Node* use = nd->fast_out(i);
|
||||
if (_clone_map.idx(use->_idx) == _clone_map.idx(nd->_idx) && use->as_Store()->in(MemNode::Memory) == nd) {
|
||||
found = true;
|
||||
nd = use;
|
||||
break;
|
||||
}
|
||||
}//for
|
||||
|
||||
if (found == false) {
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord && Verbose) {
|
||||
tty->print_cr("SuperWord::mark_generations: Cannot build order of iterations - no dependent Store for %d", nd->_idx);
|
||||
}
|
||||
#endif
|
||||
_ii_order.clear();
|
||||
return -1;
|
||||
}
|
||||
} //while
|
||||
_ii_order.push(_clone_map.gen(nd->_idx));
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::mark_generations");
|
||||
tty->print_cr("First generation (%d) nodes:", _ii_first);
|
||||
for (int ii = 0; ii < _iteration_first.length(); ii++) _iteration_first.at(ii)->dump();
|
||||
tty->print_cr("Last generation (%d) nodes:", _ii_last);
|
||||
for (int ii = 0; ii < _iteration_last.length(); ii++) _iteration_last.at(ii)->dump();
|
||||
tty->print_cr(" ");
|
||||
|
||||
tty->print("SuperWord::List of generations: ");
|
||||
for (int jj = 0; jj < _ii_order.length(); ++jj) {
|
||||
tty->print("%d:%d ", jj, _ii_order.at(jj));
|
||||
}
|
||||
tty->print_cr(" ");
|
||||
}
|
||||
#endif
|
||||
|
||||
return _ii_first;
|
||||
}
|
||||
|
||||
bool SuperWord::fix_commutative_inputs(Node* gold, Node* fix) {
|
||||
assert(gold->is_Add() && fix->is_Add() || gold->is_Mul() && fix->is_Mul(), "should be only Add or Mul nodes");
|
||||
assert(_clone_map.idx(gold->_idx) == _clone_map.idx(fix->_idx), "should be clones of the same node");
|
||||
Node* gin1 = gold->in(1);
|
||||
Node* gin2 = gold->in(2);
|
||||
Node* fin1 = fix->in(1);
|
||||
Node* fin2 = fix->in(2);
|
||||
bool swapped = false;
|
||||
|
||||
if (in_bb(gin1) && in_bb(gin2) && in_bb(fin1) && in_bb(fin1)) {
|
||||
if (_clone_map.idx(gin1->_idx) == _clone_map.idx(fin1->_idx) &&
|
||||
_clone_map.idx(gin2->_idx) == _clone_map.idx(fin2->_idx)) {
|
||||
return true; // nothing to fix
|
||||
}
|
||||
if (_clone_map.idx(gin1->_idx) == _clone_map.idx(fin2->_idx) &&
|
||||
_clone_map.idx(gin2->_idx) == _clone_map.idx(fin1->_idx)) {
|
||||
fix->swap_edges(1, 2);
|
||||
swapped = true;
|
||||
}
|
||||
}
|
||||
// at least one input comes from outside of bb
|
||||
if (gin1->_idx == fin1->_idx) {
|
||||
return true; // nothing to fix
|
||||
}
|
||||
if (!swapped && (gin1->_idx == fin2->_idx || gin2->_idx == fin1->_idx)) { //swapping is expensive, check condition first
|
||||
fix->swap_edges(1, 2);
|
||||
swapped = true;
|
||||
}
|
||||
|
||||
if (swapped) {
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::fix_commutative_inputs: fixed node %d", fix->_idx);
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord && Verbose) {
|
||||
tty->print_cr("SuperWord::fix_commutative_inputs: cannot fix node %d", fix->_idx);
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SuperWord::pack_parallel() {
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::pack_parallel: START");
|
||||
}
|
||||
#endif
|
||||
|
||||
_packset.clear();
|
||||
|
||||
for (int ii = 0; ii < _iteration_first.length(); ii++) {
|
||||
Node* nd = _iteration_first.at(ii);
|
||||
if (in_bb(nd) && (nd->is_Load() || nd->is_Store() || nd->is_Add() || nd->is_Mul())) {
|
||||
Node_List* pk = new Node_List();
|
||||
pk->push(nd);
|
||||
for (int gen = 1; gen < _ii_order.length(); ++gen) {
|
||||
for (int kk = 0; kk < _block.length(); kk++) {
|
||||
Node* clone = _block.at(kk);
|
||||
if (_clone_map.idx(clone->_idx) == _clone_map.idx(nd->_idx) &&
|
||||
_clone_map.gen(clone->_idx) == _ii_order.at(gen)) {
|
||||
if (nd->is_Add() || nd->is_Mul()) {
|
||||
fix_commutative_inputs(nd, clone);
|
||||
}
|
||||
pk->push(clone);
|
||||
if (pk->size() == 4) {
|
||||
_packset.append(pk);
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::pack_parallel: added pack ");
|
||||
pk->dump();
|
||||
}
|
||||
#endif
|
||||
if (_clone_map.gen(clone->_idx) != _ii_last) {
|
||||
pk = new Node_List();
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}//for
|
||||
}//if
|
||||
}//for
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::pack_parallel: END");
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SuperWord::hoist_loads_in_graph() {
|
||||
GrowableArray<Node*> loads;
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::hoist_loads_in_graph: total number _mem_slice_head.length() = %d", _mem_slice_head.length());
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < _mem_slice_head.length(); i++) {
|
||||
Node* n = _mem_slice_head.at(i);
|
||||
if ( !in_bb(n) || !n->is_Phi() || n->bottom_type() != Type::MEMORY) {
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord && Verbose) {
|
||||
tty->print_cr("SuperWord::hoist_loads_in_graph: skipping unexpected node n=%d", n->_idx);
|
||||
}
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::hoist_loads_in_graph: processing phi %d = _mem_slice_head.at(%d);", n->_idx, i);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
|
||||
Node* ld = n->fast_out(i);
|
||||
if (ld->is_Load() && ld->as_Load()->in(MemNode::Memory) == n && in_bb(ld)) {
|
||||
for (int i = 0; i < _block.length(); i++) {
|
||||
Node* ld2 = _block.at(i);
|
||||
if (ld2->is_Load() &&
|
||||
_clone_map.idx(ld->_idx) == _clone_map.idx(ld2->_idx) &&
|
||||
_clone_map.gen(ld->_idx) != _clone_map.gen(ld2->_idx)) { // <= do not collect the first generation ld
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::hoist_loads_in_graph: will try to hoist load ld2->_idx=%d, cloned from %d (ld->_idx=%d)",
|
||||
ld2->_idx, _clone_map.idx(ld->_idx), ld->_idx);
|
||||
}
|
||||
#endif
|
||||
// could not do on-the-fly, since iterator is immutable
|
||||
loads.push(ld2);
|
||||
}
|
||||
}// for
|
||||
}//if
|
||||
}//for (DUIterator_Fast imax,
|
||||
}//for (int i = 0; i
|
||||
|
||||
for (int i = 0; i < loads.length(); i++) {
|
||||
LoadNode* ld = loads.at(i)->as_Load();
|
||||
Node* phi = find_phi_for_mem_dep(ld);
|
||||
if (phi != NULL) {
|
||||
#ifndef PRODUCT
|
||||
if (_vector_loop_debug) {
|
||||
tty->print_cr("SuperWord::hoist_loads_in_graph replacing MemNode::Memory(%d) edge in %d with one from %d",
|
||||
MemNode::Memory, ld->_idx, phi->_idx);
|
||||
}
|
||||
#endif
|
||||
_igvn.replace_input_of(ld, MemNode::Memory, phi);
|
||||
}
|
||||
}//for
|
||||
|
||||
restart(); // invalidate all basic structures, since we rebuilt the graph
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord && Verbose) {
|
||||
tty->print_cr("\nSuperWord::hoist_loads_in_graph() the graph was rebuilt, all structures invalidated and need rebuild");
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -218,8 +218,10 @@ class SuperWord : public ResourceObj {
|
||||
GrowableArray<Node*> _data_entry; // Nodes with all inputs from outside
|
||||
GrowableArray<Node*> _mem_slice_head; // Memory slice head nodes
|
||||
GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes
|
||||
|
||||
GrowableArray<Node*> _iteration_first; // nodes in the generation that has deps from phi
|
||||
GrowableArray<Node*> _iteration_last; // nodes in the generation that has deps to phi
|
||||
GrowableArray<SWNodeInfo> _node_info; // Info needed per node
|
||||
CloneMap& _clone_map; // map of nodes created in cloning
|
||||
|
||||
MemNode* _align_to_ref; // Memory reference that pre-loop will align to
|
||||
|
||||
@ -250,8 +252,13 @@ class SuperWord : public ResourceObj {
|
||||
Node* _bb; // Current basic block
|
||||
PhiNode* _iv; // Induction var
|
||||
bool _race_possible; // In cases where SDMU is true
|
||||
bool _do_vector_loop; // whether to do vectorization/simd style
|
||||
bool _vector_loop_debug; // provide more printing in debug mode
|
||||
int _num_work_vecs; // Number of non memory vector operations
|
||||
int _num_reductions; // Number of reduction expressions applied
|
||||
int _ii_first; // generation with direct deps from mem phi
|
||||
int _ii_last; // generation with direct deps to mem phi
|
||||
GrowableArray<int> _ii_order;
|
||||
|
||||
// Accessors
|
||||
Arena* arena() { return _arena; }
|
||||
@ -326,6 +333,22 @@ class SuperWord : public ResourceObj {
|
||||
int get_iv_adjustment(MemNode* mem);
|
||||
// Can the preloop align the reference to position zero in the vector?
|
||||
bool ref_is_alignable(SWPointer& p);
|
||||
// rebuild the graph so all loads in different iterations of cloned loop become dependant on phi node (in _do_vector_loop only)
|
||||
bool hoist_loads_in_graph();
|
||||
// Test whether MemNode::Memory dependency to the same load but in the first iteration of this loop is coming from memory phi
|
||||
// Return false if failed.
|
||||
Node* find_phi_for_mem_dep(LoadNode* ld);
|
||||
// Return same node but from the first generation. Return 0, if not found
|
||||
Node* first_node(Node* nd);
|
||||
// Return same node as this but from the last generation. Return 0, if not found
|
||||
Node* last_node(Node* n);
|
||||
// Mark nodes belonging to first and last generation,
|
||||
// returns first generation index or -1 if vectorization/simd is impossible
|
||||
int mark_generations();
|
||||
// swapping inputs of commutative instruction (Add or Mul)
|
||||
bool fix_commutative_inputs(Node* gold, Node* fix);
|
||||
// make packs forcefully (in _do_vector_loop only)
|
||||
bool pack_parallel();
|
||||
// Construct dependency graph.
|
||||
void dependence_graph();
|
||||
// Return a memory slice (node list) in predecessor order starting at "start"
|
||||
@ -419,6 +442,8 @@ class SuperWord : public ResourceObj {
|
||||
// Is the use of d1 in u1 at the same operand position as d2 in u2?
|
||||
bool opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2);
|
||||
void init();
|
||||
// clean up some basic structures - used if the ideal graph was rebuilt
|
||||
void restart();
|
||||
|
||||
// print methods
|
||||
void print_packset();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -381,3 +381,4 @@ template class BasicHashtable<mtClass>;
|
||||
template class BasicHashtable<mtSymbol>;
|
||||
template class BasicHashtable<mtCode>;
|
||||
template class BasicHashtable<mtInternal>;
|
||||
template class BasicHashtable<mtCompiler>;
|
||||
|
Loading…
x
Reference in New Issue
Block a user