8326962: C2 SuperWord: cache VPointer
Reviewed-by: chagedorn, kvn
This commit is contained in:
parent
2931458711
commit
f762637be2
@ -531,13 +531,13 @@ void SuperWord::find_adjacent_refs() {
|
|||||||
set_align_to_ref(align_to_mem_ref);
|
set_align_to_ref(align_to_mem_ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
VPointer align_to_ref_p(mem_ref, _vloop);
|
const VPointer& align_to_ref_p = vpointer(mem_ref);
|
||||||
// Set alignment relative to "align_to_ref" for all related memory operations.
|
// Set alignment relative to "align_to_ref" for all related memory operations.
|
||||||
for (int i = memops.size() - 1; i >= 0; i--) {
|
for (int i = memops.size() - 1; i >= 0; i--) {
|
||||||
MemNode* s = memops.at(i)->as_Mem();
|
MemNode* s = memops.at(i)->as_Mem();
|
||||||
if (isomorphic(s, mem_ref) &&
|
if (isomorphic(s, mem_ref) &&
|
||||||
(!_do_vector_loop || same_origin_idx(s, mem_ref))) {
|
(!_do_vector_loop || same_origin_idx(s, mem_ref))) {
|
||||||
VPointer p2(s, _vloop);
|
const VPointer& p2 = vpointer(s);
|
||||||
if (p2.comparable(align_to_ref_p)) {
|
if (p2.comparable(align_to_ref_p)) {
|
||||||
int align = memory_alignment(s, iv_adjustment);
|
int align = memory_alignment(s, iv_adjustment);
|
||||||
set_alignment(s, align);
|
set_alignment(s, align);
|
||||||
@ -593,11 +593,11 @@ MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) {
|
|||||||
// Count number of comparable memory ops
|
// Count number of comparable memory ops
|
||||||
for (uint i = 0; i < memops.size(); i++) {
|
for (uint i = 0; i < memops.size(); i++) {
|
||||||
MemNode* s1 = memops.at(i)->as_Mem();
|
MemNode* s1 = memops.at(i)->as_Mem();
|
||||||
VPointer p1(s1, _vloop);
|
const VPointer& p1 = vpointer(s1);
|
||||||
for (uint j = i+1; j < memops.size(); j++) {
|
for (uint j = i+1; j < memops.size(); j++) {
|
||||||
MemNode* s2 = memops.at(j)->as_Mem();
|
MemNode* s2 = memops.at(j)->as_Mem();
|
||||||
if (isomorphic(s1, s2)) {
|
if (isomorphic(s1, s2)) {
|
||||||
VPointer p2(s2, _vloop);
|
const VPointer& p2 = vpointer(s2);
|
||||||
if (p1.comparable(p2)) {
|
if (p1.comparable(p2)) {
|
||||||
(*cmp_ct.adr_at(i))++;
|
(*cmp_ct.adr_at(i))++;
|
||||||
(*cmp_ct.adr_at(j))++;
|
(*cmp_ct.adr_at(j))++;
|
||||||
@ -618,7 +618,7 @@ MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) {
|
|||||||
if (s->is_Store()) {
|
if (s->is_Store()) {
|
||||||
int vw = vector_width_in_bytes(s);
|
int vw = vector_width_in_bytes(s);
|
||||||
assert(vw > 1, "sanity");
|
assert(vw > 1, "sanity");
|
||||||
VPointer p(s, _vloop);
|
const VPointer& p = vpointer(s);
|
||||||
if ( cmp_ct.at(j) > max_ct ||
|
if ( cmp_ct.at(j) > max_ct ||
|
||||||
(cmp_ct.at(j) == max_ct &&
|
(cmp_ct.at(j) == max_ct &&
|
||||||
( vw > max_vw ||
|
( vw > max_vw ||
|
||||||
@ -641,7 +641,7 @@ MemNode* SuperWord::find_align_to_ref(Node_List &memops, int &idx) {
|
|||||||
if (s->is_Load()) {
|
if (s->is_Load()) {
|
||||||
int vw = vector_width_in_bytes(s);
|
int vw = vector_width_in_bytes(s);
|
||||||
assert(vw > 1, "sanity");
|
assert(vw > 1, "sanity");
|
||||||
VPointer p(s, _vloop);
|
const VPointer& p = vpointer(s);
|
||||||
if ( cmp_ct.at(j) > max_ct ||
|
if ( cmp_ct.at(j) > max_ct ||
|
||||||
(cmp_ct.at(j) == max_ct &&
|
(cmp_ct.at(j) == max_ct &&
|
||||||
( vw > max_vw ||
|
( vw > max_vw ||
|
||||||
@ -714,7 +714,7 @@ int SuperWord::get_vw_bytes_special(MemNode* s) {
|
|||||||
//---------------------------get_iv_adjustment---------------------------
|
//---------------------------get_iv_adjustment---------------------------
|
||||||
// Calculate loop's iv adjustment for this memory ops.
|
// Calculate loop's iv adjustment for this memory ops.
|
||||||
int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
|
int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
|
||||||
VPointer align_to_ref_p(mem_ref, _vloop);
|
const VPointer& align_to_ref_p = vpointer(mem_ref);
|
||||||
int offset = align_to_ref_p.offset_in_bytes();
|
int offset = align_to_ref_p.offset_in_bytes();
|
||||||
int scale = align_to_ref_p.scale_in_bytes();
|
int scale = align_to_ref_p.scale_in_bytes();
|
||||||
int elt_size = align_to_ref_p.memory_size();
|
int elt_size = align_to_ref_p.memory_size();
|
||||||
@ -875,8 +875,8 @@ bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) const {
|
|||||||
|
|
||||||
// Adjacent memory references must have the same base, be comparable
|
// Adjacent memory references must have the same base, be comparable
|
||||||
// and have the correct distance between them.
|
// and have the correct distance between them.
|
||||||
VPointer p1(s1->as_Mem(), _vloop);
|
const VPointer& p1 = vpointer(s1->as_Mem());
|
||||||
VPointer p2(s2->as_Mem(), _vloop);
|
const VPointer& p2 = vpointer(s2->as_Mem());
|
||||||
if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
|
if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
|
||||||
int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
|
int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
|
||||||
return diff == data_size(s1);
|
return diff == data_size(s1);
|
||||||
@ -1637,7 +1637,7 @@ const AlignmentSolution* SuperWord::pack_alignment_solution(const Node_List* pac
|
|||||||
assert(pack != nullptr && (pack->at(0)->is_Load() || pack->at(0)->is_Store()), "only load/store packs");
|
assert(pack != nullptr && (pack->at(0)->is_Load() || pack->at(0)->is_Store()), "only load/store packs");
|
||||||
|
|
||||||
const MemNode* mem_ref = pack->at(0)->as_Mem();
|
const MemNode* mem_ref = pack->at(0)->as_Mem();
|
||||||
VPointer mem_ref_p(mem_ref, _vloop);
|
const VPointer& mem_ref_p = vpointer(mem_ref);
|
||||||
const CountedLoopEndNode* pre_end = _vloop.pre_loop_end();
|
const CountedLoopEndNode* pre_end = _vloop.pre_loop_end();
|
||||||
assert(pre_end->stride_is_con(), "pre loop stride is constant");
|
assert(pre_end->stride_is_con(), "pre loop stride is constant");
|
||||||
|
|
||||||
@ -3310,7 +3310,7 @@ int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
|
|||||||
tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump();
|
tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
VPointer p(s, _vloop);
|
const VPointer& p = vpointer(s);
|
||||||
if (!p.valid()) {
|
if (!p.valid()) {
|
||||||
NOT_PRODUCT(if(is_trace_superword_alignment()) tty->print_cr("SuperWord::memory_alignment: VPointer p invalid, return bottom_align");)
|
NOT_PRODUCT(if(is_trace_superword_alignment()) tty->print_cr("SuperWord::memory_alignment: VPointer p invalid, return bottom_align");)
|
||||||
return bottom_align;
|
return bottom_align;
|
||||||
@ -3413,7 +3413,7 @@ void SuperWord::adjust_pre_loop_limit_to_align_main_loop_vectors() {
|
|||||||
Node* orig_limit = pre_opaq->original_loop_limit();
|
Node* orig_limit = pre_opaq->original_loop_limit();
|
||||||
assert(orig_limit != nullptr && igvn().type(orig_limit) != Type::TOP, "");
|
assert(orig_limit != nullptr && igvn().type(orig_limit) != Type::TOP, "");
|
||||||
|
|
||||||
VPointer align_to_ref_p(align_to_ref, _vloop);
|
const VPointer& align_to_ref_p = vpointer(align_to_ref);
|
||||||
assert(align_to_ref_p.valid(), "sanity");
|
assert(align_to_ref_p.valid(), "sanity");
|
||||||
|
|
||||||
// For the main-loop, we want the address of align_to_ref to be memory aligned
|
// For the main-loop, we want the address of align_to_ref to be memory aligned
|
||||||
|
@ -425,7 +425,7 @@ class SuperWord : public ResourceObj {
|
|||||||
// Decide if loop can eventually be vectorized, and what unrolling factor is required.
|
// Decide if loop can eventually be vectorized, and what unrolling factor is required.
|
||||||
static void unrolling_analysis(const VLoop &vloop, int &local_loop_unroll_factor);
|
static void unrolling_analysis(const VLoop &vloop, int &local_loop_unroll_factor);
|
||||||
|
|
||||||
// VLoop Accessors
|
// VLoop accessors
|
||||||
PhaseIdealLoop* phase() const { return _vloop.phase(); }
|
PhaseIdealLoop* phase() const { return _vloop.phase(); }
|
||||||
PhaseIterGVN& igvn() const { return _vloop.phase()->igvn(); }
|
PhaseIterGVN& igvn() const { return _vloop.phase()->igvn(); }
|
||||||
IdealLoopTree* lpt() const { return _vloop.lpt(); }
|
IdealLoopTree* lpt() const { return _vloop.lpt(); }
|
||||||
@ -434,7 +434,7 @@ class SuperWord : public ResourceObj {
|
|||||||
int iv_stride() const { return cl()->stride_con(); }
|
int iv_stride() const { return cl()->stride_con(); }
|
||||||
bool in_bb(const Node* n) const { return _vloop.in_bb(n); }
|
bool in_bb(const Node* n) const { return _vloop.in_bb(n); }
|
||||||
|
|
||||||
// VLoopReductions Accessors
|
// VLoopReductions accessors
|
||||||
bool is_marked_reduction(const Node* n) const {
|
bool is_marked_reduction(const Node* n) const {
|
||||||
return _vloop_analyzer.reductions().is_marked_reduction(n);
|
return _vloop_analyzer.reductions().is_marked_reduction(n);
|
||||||
}
|
}
|
||||||
@ -443,12 +443,12 @@ class SuperWord : public ResourceObj {
|
|||||||
return _vloop_analyzer.reductions().is_marked_reduction_pair(n1, n2);
|
return _vloop_analyzer.reductions().is_marked_reduction_pair(n1, n2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// VLoopMemorySlices Accessors
|
// VLoopMemorySlices accessors
|
||||||
bool same_memory_slice(MemNode* n1, MemNode* n2) const {
|
bool same_memory_slice(MemNode* n1, MemNode* n2) const {
|
||||||
return _vloop_analyzer.memory_slices().same_memory_slice(n1, n2);
|
return _vloop_analyzer.memory_slices().same_memory_slice(n1, n2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// VLoopBody Accessors
|
// VLoopBody accessors
|
||||||
const GrowableArray<Node*>& body() const {
|
const GrowableArray<Node*>& body() const {
|
||||||
return _vloop_analyzer.body().body();
|
return _vloop_analyzer.body().body();
|
||||||
}
|
}
|
||||||
@ -457,7 +457,7 @@ class SuperWord : public ResourceObj {
|
|||||||
return _vloop_analyzer.body().bb_idx(n);
|
return _vloop_analyzer.body().bb_idx(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
// VLoopTypes Accessors
|
// VLoopTypes accessors
|
||||||
const Type* velt_type(Node* n) const {
|
const Type* velt_type(Node* n) const {
|
||||||
return _vloop_analyzer.types().velt_type(n);
|
return _vloop_analyzer.types().velt_type(n);
|
||||||
}
|
}
|
||||||
@ -482,7 +482,7 @@ class SuperWord : public ResourceObj {
|
|||||||
return _vloop_analyzer.types().vector_width_in_bytes(n);
|
return _vloop_analyzer.types().vector_width_in_bytes(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
// VLoopDependencyGraph Accessors
|
// VLoopDependencyGraph accessors
|
||||||
const VLoopDependencyGraph& dependency_graph() const {
|
const VLoopDependencyGraph& dependency_graph() const {
|
||||||
return _vloop_analyzer.dependency_graph();
|
return _vloop_analyzer.dependency_graph();
|
||||||
}
|
}
|
||||||
@ -495,6 +495,11 @@ class SuperWord : public ResourceObj {
|
|||||||
return _vloop_analyzer.dependency_graph().mutually_independent(nodes);
|
return _vloop_analyzer.dependency_graph().mutually_independent(nodes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// VLoopVPointer accessors
|
||||||
|
const VPointer& vpointer(const MemNode* mem) const {
|
||||||
|
return _vloop_analyzer.vpointers().vpointer(mem);
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
// TraceAutoVectorization and TraceSuperWord
|
// TraceAutoVectorization and TraceSuperWord
|
||||||
bool is_trace_superword_alignment() const {
|
bool is_trace_superword_alignment() const {
|
||||||
|
@ -29,12 +29,13 @@
|
|||||||
#include "utilities/stringUtils.hpp"
|
#include "utilities/stringUtils.hpp"
|
||||||
|
|
||||||
#define COMPILER_TRACE_AUTO_VECTORIZATION_TAG(flags) \
|
#define COMPILER_TRACE_AUTO_VECTORIZATION_TAG(flags) \
|
||||||
flags(POINTER_ANALYSIS, "Trace VPointer") \
|
flags(POINTER_ANALYSIS, "Trace VPointer (verbose)") \
|
||||||
flags(PRECONDITIONS, "Trace VLoop::check_preconditions") \
|
flags(PRECONDITIONS, "Trace VLoop::check_preconditions") \
|
||||||
flags(LOOP_ANALYZER, "Trace VLoopAnalyzer::setup_submodules") \
|
flags(LOOP_ANALYZER, "Trace VLoopAnalyzer::setup_submodules") \
|
||||||
flags(MEMORY_SLICES, "Trace VLoopMemorySlices") \
|
flags(MEMORY_SLICES, "Trace VLoopMemorySlices") \
|
||||||
flags(BODY, "Trace VLoopBody") \
|
flags(BODY, "Trace VLoopBody") \
|
||||||
flags(TYPES, "Trace VLoopTypes") \
|
flags(TYPES, "Trace VLoopTypes") \
|
||||||
|
flags(POINTERS, "Trace VLoopPointers") \
|
||||||
flags(DEPENDENCY_GRAPH, "Trace VLoopDependencyGraph") \
|
flags(DEPENDENCY_GRAPH, "Trace VLoopDependencyGraph") \
|
||||||
flags(SW_ALIGNMENT, "Trace SuperWord alignment analysis") \
|
flags(SW_ALIGNMENT, "Trace SuperWord alignment analysis") \
|
||||||
flags(SW_ADJACENT_MEMOPS, "Trace SuperWord::find_adjacent_refs") \
|
flags(SW_ADJACENT_MEMOPS, "Trace SuperWord::find_adjacent_refs") \
|
||||||
|
@ -31,6 +31,19 @@
|
|||||||
#include "opto/rootnode.hpp"
|
#include "opto/rootnode.hpp"
|
||||||
#include "opto/vectorization.hpp"
|
#include "opto/vectorization.hpp"
|
||||||
|
|
||||||
|
#ifndef PRODUCT
|
||||||
|
static void print_con_or_idx(const Node* n) {
|
||||||
|
if (n == nullptr) {
|
||||||
|
tty->print("( 0)");
|
||||||
|
} else if (n->is_ConI()) {
|
||||||
|
jint val = n->as_ConI()->get_int();
|
||||||
|
tty->print("(%4d)", val);
|
||||||
|
} else {
|
||||||
|
tty->print("[%4d]", n->_idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
bool VLoop::check_preconditions() {
|
bool VLoop::check_preconditions() {
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
if (is_trace_preconditions()) {
|
if (is_trace_preconditions()) {
|
||||||
@ -161,11 +174,62 @@ VStatus VLoopAnalyzer::setup_submodules_helper() {
|
|||||||
|
|
||||||
_types.compute_vector_element_type();
|
_types.compute_vector_element_type();
|
||||||
|
|
||||||
|
_vpointers.compute_vpointers();
|
||||||
|
|
||||||
_dependency_graph.construct();
|
_dependency_graph.construct();
|
||||||
|
|
||||||
return VStatus::make_success();
|
return VStatus::make_success();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VLoopVPointers::compute_vpointers() {
|
||||||
|
count_vpointers();
|
||||||
|
allocate_vpointers_array();
|
||||||
|
compute_and_cache_vpointers();
|
||||||
|
NOT_PRODUCT( if (_vloop.is_trace_vpointers()) { print(); } )
|
||||||
|
}
|
||||||
|
|
||||||
|
void VLoopVPointers::count_vpointers() {
|
||||||
|
_vpointers_length = 0;
|
||||||
|
_body.for_each_mem([&] (const MemNode* mem, int bb_idx) {
|
||||||
|
_vpointers_length++;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void VLoopVPointers::allocate_vpointers_array() {
|
||||||
|
uint bytes = _vpointers_length * sizeof(VPointer);
|
||||||
|
_vpointers = (VPointer*)_arena->Amalloc(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VLoopVPointers::compute_and_cache_vpointers() {
|
||||||
|
int pointers_idx = 0;
|
||||||
|
_body.for_each_mem([&] (const MemNode* mem, int bb_idx) {
|
||||||
|
// Placement new: construct directly into the array.
|
||||||
|
::new (&_vpointers[pointers_idx]) VPointer(mem, _vloop);
|
||||||
|
_bb_idx_to_vpointer.at_put(bb_idx, pointers_idx);
|
||||||
|
pointers_idx++;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const VPointer& VLoopVPointers::vpointer(const MemNode* mem) const {
|
||||||
|
assert(mem != nullptr && _vloop.in_bb(mem), "only mem in loop");
|
||||||
|
int bb_idx = _body.bb_idx(mem);
|
||||||
|
int pointers_idx = _bb_idx_to_vpointer.at(bb_idx);
|
||||||
|
assert(0 <= pointers_idx && pointers_idx < _vpointers_length, "valid range");
|
||||||
|
return _vpointers[pointers_idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef PRODUCT
|
||||||
|
void VLoopVPointers::print() const {
|
||||||
|
tty->print_cr("\nVLoopVPointers::print:");
|
||||||
|
|
||||||
|
_body.for_each_mem([&] (const MemNode* mem, int bb_idx) {
|
||||||
|
const VPointer& p = vpointer(mem);
|
||||||
|
tty->print(" ");
|
||||||
|
p.print();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Construct the dependency graph:
|
// Construct the dependency graph:
|
||||||
// - Data-dependencies: implicit (taken from C2 node inputs).
|
// - Data-dependencies: implicit (taken from C2 node inputs).
|
||||||
// - Memory-dependencies:
|
// - Memory-dependencies:
|
||||||
@ -193,7 +257,7 @@ void VLoopDependencyGraph::construct() {
|
|||||||
MemNode* n1 = slice_nodes.at(j);
|
MemNode* n1 = slice_nodes.at(j);
|
||||||
memory_pred_edges.clear();
|
memory_pred_edges.clear();
|
||||||
|
|
||||||
VPointer p1(n1, _vloop);
|
const VPointer& p1 = _vpointers.vpointer(n1);
|
||||||
// For all memory nodes before it, check if we need to add a memory edge.
|
// For all memory nodes before it, check if we need to add a memory edge.
|
||||||
for (int k = slice_nodes.length() - 1; k > j; k--) {
|
for (int k = slice_nodes.length() - 1; k > j; k--) {
|
||||||
MemNode* n2 = slice_nodes.at(k);
|
MemNode* n2 = slice_nodes.at(k);
|
||||||
@ -201,7 +265,7 @@ void VLoopDependencyGraph::construct() {
|
|||||||
// Ignore Load-Load dependencies:
|
// Ignore Load-Load dependencies:
|
||||||
if (n1->is_Load() && n2->is_Load()) { continue; }
|
if (n1->is_Load() && n2->is_Load()) { continue; }
|
||||||
|
|
||||||
VPointer p2(n2, _vloop);
|
const VPointer& p2 = _vpointers.vpointer(n2);
|
||||||
if (!VPointer::not_equal(p1.cmp(p2))) {
|
if (!VPointer::not_equal(p1.cmp(p2))) {
|
||||||
// Possibly overlapping memory
|
// Possibly overlapping memory
|
||||||
memory_pred_edges.append(_body.bb_idx(n2));
|
memory_pred_edges.append(_body.bb_idx(n2));
|
||||||
@ -723,19 +787,24 @@ void VPointer::maybe_add_to_invar(Node* new_invar, bool negate) {
|
|||||||
_invar = register_if_new(add);
|
_invar = register_if_new(add);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function for printing the fields of a VPointer
|
|
||||||
void VPointer::print() {
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
tty->print("base: [%d] adr: [%d] scale: %d offset: %d",
|
// Function for printing the fields of a VPointer
|
||||||
_base != nullptr ? _base->_idx : 0,
|
void VPointer::print() const {
|
||||||
_adr != nullptr ? _adr->_idx : 0,
|
tty->print("VPointer[mem: %4d %10s, ", _mem->_idx, _mem->Name());
|
||||||
_scale, _offset);
|
tty->print("base: %4d, ", _base != nullptr ? _base->_idx : 0);
|
||||||
if (_invar != nullptr) {
|
tty->print("adr: %4d, ", _adr != nullptr ? _adr->_idx : 0);
|
||||||
tty->print(" invar: [%d]", _invar->_idx);
|
|
||||||
}
|
tty->print(" base");
|
||||||
tty->cr();
|
print_con_or_idx(_base);
|
||||||
#endif
|
|
||||||
|
tty->print(" + offset(%4d)", _offset);
|
||||||
|
|
||||||
|
tty->print(" + invar");
|
||||||
|
print_con_or_idx(_invar);
|
||||||
|
|
||||||
|
tty->print_cr(" + scale(%4d) * iv]", _scale);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Following are functions for tracing VPointer match
|
// Following are functions for tracing VPointer match
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
@ -1502,17 +1571,6 @@ AlignmentSolution* AlignmentSolver::solve() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef ASSERT
|
#ifdef ASSERT
|
||||||
static void print_con_or_idx(const Node* n) {
|
|
||||||
if (n == nullptr) {
|
|
||||||
tty->print("(0)");
|
|
||||||
} else if (n->is_ConI()) {
|
|
||||||
jint val = n->as_ConI()->get_int();
|
|
||||||
tty->print("(%d)", val);
|
|
||||||
} else {
|
|
||||||
tty->print("[%d]", n->_idx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlignmentSolver::trace_start_solve() const {
|
void AlignmentSolver::trace_start_solve() const {
|
||||||
if (is_trace()) {
|
if (is_trace()) {
|
||||||
tty->print(" vector mem_ref:");
|
tty->print(" vector mem_ref:");
|
||||||
|
@ -33,6 +33,8 @@
|
|||||||
// Code in this file and the vectorization.cpp contains shared logics and
|
// Code in this file and the vectorization.cpp contains shared logics and
|
||||||
// utilities for C2's loop auto-vectorization.
|
// utilities for C2's loop auto-vectorization.
|
||||||
|
|
||||||
|
class VPointer;
|
||||||
|
|
||||||
class VStatus : public StackObj {
|
class VStatus : public StackObj {
|
||||||
private:
|
private:
|
||||||
const char* _failure_reason;
|
const char* _failure_reason;
|
||||||
@ -154,6 +156,10 @@ public:
|
|||||||
return _vtrace.is_trace(TraceAutoVectorizationTag::DEPENDENCY_GRAPH);
|
return _vtrace.is_trace(TraceAutoVectorizationTag::DEPENDENCY_GRAPH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_trace_vpointers() const {
|
||||||
|
return _vtrace.is_trace(TraceAutoVectorizationTag::POINTERS);
|
||||||
|
}
|
||||||
|
|
||||||
bool is_trace_pointer_analysis() const {
|
bool is_trace_pointer_analysis() const {
|
||||||
return _vtrace.is_trace(TraceAutoVectorizationTag::POINTER_ANALYSIS);
|
return _vtrace.is_trace(TraceAutoVectorizationTag::POINTER_ANALYSIS);
|
||||||
}
|
}
|
||||||
@ -356,6 +362,16 @@ public:
|
|||||||
return _body_idx.at(n->_idx);
|
return _body_idx.at(n->_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename Callback>
|
||||||
|
void for_each_mem(Callback callback) const {
|
||||||
|
for (int i = 0; i < _body.length(); i++) {
|
||||||
|
MemNode* mem = _body.at(i)->isa_Mem();
|
||||||
|
if (mem != nullptr && _vloop.in_bb(mem)) {
|
||||||
|
callback(mem, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void set_bb_idx(Node* n, int i) {
|
void set_bb_idx(Node* n, int i) {
|
||||||
_body_idx.at_put_grow(n->_idx, i);
|
_body_idx.at_put_grow(n->_idx, i);
|
||||||
@ -445,6 +461,45 @@ private:
|
|||||||
const Type* container_type(Node* n) const;
|
const Type* container_type(Node* n) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Submodule of VLoopAnalyzer.
|
||||||
|
// We compute and cache the VPointer for every load and store.
|
||||||
|
class VLoopVPointers : public StackObj {
|
||||||
|
private:
|
||||||
|
Arena* _arena;
|
||||||
|
const VLoop& _vloop;
|
||||||
|
const VLoopBody& _body;
|
||||||
|
|
||||||
|
// Array of cached pointers
|
||||||
|
VPointer* _vpointers;
|
||||||
|
int _vpointers_length;
|
||||||
|
|
||||||
|
// Map bb_idx -> index in _vpointers. -1 if not mapped.
|
||||||
|
GrowableArray<int> _bb_idx_to_vpointer;
|
||||||
|
|
||||||
|
public:
|
||||||
|
VLoopVPointers(Arena* arena,
|
||||||
|
const VLoop& vloop,
|
||||||
|
const VLoopBody& body) :
|
||||||
|
_arena(arena),
|
||||||
|
_vloop(vloop),
|
||||||
|
_body(body),
|
||||||
|
_vpointers(nullptr),
|
||||||
|
_bb_idx_to_vpointer(arena,
|
||||||
|
vloop.estimated_body_length(),
|
||||||
|
vloop.estimated_body_length(),
|
||||||
|
-1) {}
|
||||||
|
NONCOPYABLE(VLoopVPointers);
|
||||||
|
|
||||||
|
void compute_vpointers();
|
||||||
|
const VPointer& vpointer(const MemNode* mem) const;
|
||||||
|
NOT_PRODUCT( void print() const; )
|
||||||
|
|
||||||
|
private:
|
||||||
|
void count_vpointers();
|
||||||
|
void allocate_vpointers_array();
|
||||||
|
void compute_and_cache_vpointers();
|
||||||
|
};
|
||||||
|
|
||||||
// Submodule of VLoopAnalyzer.
|
// Submodule of VLoopAnalyzer.
|
||||||
// The dependency graph is used to determine if nodes are independent, and can thus potentially
|
// The dependency graph is used to determine if nodes are independent, and can thus potentially
|
||||||
// be executed in parallel. That is a prerequisite for packing nodes into vector operations.
|
// be executed in parallel. That is a prerequisite for packing nodes into vector operations.
|
||||||
@ -461,6 +516,7 @@ private:
|
|||||||
const VLoop& _vloop;
|
const VLoop& _vloop;
|
||||||
const VLoopBody& _body;
|
const VLoopBody& _body;
|
||||||
const VLoopMemorySlices& _memory_slices;
|
const VLoopMemorySlices& _memory_slices;
|
||||||
|
const VLoopVPointers& _vpointers;
|
||||||
|
|
||||||
// bb_idx -> DependenceNode*
|
// bb_idx -> DependenceNode*
|
||||||
GrowableArray<DependencyNode*> _dependency_nodes;
|
GrowableArray<DependencyNode*> _dependency_nodes;
|
||||||
@ -472,11 +528,13 @@ public:
|
|||||||
VLoopDependencyGraph(Arena* arena,
|
VLoopDependencyGraph(Arena* arena,
|
||||||
const VLoop& vloop,
|
const VLoop& vloop,
|
||||||
const VLoopBody& body,
|
const VLoopBody& body,
|
||||||
const VLoopMemorySlices& memory_slices) :
|
const VLoopMemorySlices& memory_slices,
|
||||||
|
const VLoopVPointers& pointers) :
|
||||||
_arena(arena),
|
_arena(arena),
|
||||||
_vloop(vloop),
|
_vloop(vloop),
|
||||||
_body(body),
|
_body(body),
|
||||||
_memory_slices(memory_slices),
|
_memory_slices(memory_slices),
|
||||||
|
_vpointers(pointers),
|
||||||
_dependency_nodes(arena,
|
_dependency_nodes(arena,
|
||||||
vloop.estimated_body_length(),
|
vloop.estimated_body_length(),
|
||||||
vloop.estimated_body_length(),
|
vloop.estimated_body_length(),
|
||||||
@ -570,6 +628,7 @@ private:
|
|||||||
VLoopMemorySlices _memory_slices;
|
VLoopMemorySlices _memory_slices;
|
||||||
VLoopBody _body;
|
VLoopBody _body;
|
||||||
VLoopTypes _types;
|
VLoopTypes _types;
|
||||||
|
VLoopVPointers _vpointers;
|
||||||
VLoopDependencyGraph _dependency_graph;
|
VLoopDependencyGraph _dependency_graph;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -581,7 +640,8 @@ public:
|
|||||||
_memory_slices (&_arena, vloop),
|
_memory_slices (&_arena, vloop),
|
||||||
_body (&_arena, vloop, vshared),
|
_body (&_arena, vloop, vshared),
|
||||||
_types (&_arena, vloop, _body),
|
_types (&_arena, vloop, _body),
|
||||||
_dependency_graph(&_arena, vloop, _body, _memory_slices)
|
_vpointers (&_arena, vloop, _body),
|
||||||
|
_dependency_graph(&_arena, vloop, _body, _memory_slices, _vpointers)
|
||||||
{
|
{
|
||||||
_success = setup_submodules();
|
_success = setup_submodules();
|
||||||
}
|
}
|
||||||
@ -595,6 +655,7 @@ public:
|
|||||||
const VLoopMemorySlices& memory_slices() const { return _memory_slices; }
|
const VLoopMemorySlices& memory_slices() const { return _memory_slices; }
|
||||||
const VLoopBody& body() const { return _body; }
|
const VLoopBody& body() const { return _body; }
|
||||||
const VLoopTypes& types() const { return _types; }
|
const VLoopTypes& types() const { return _types; }
|
||||||
|
const VLoopVPointers& vpointers() const { return _vpointers; }
|
||||||
const VLoopDependencyGraph& dependency_graph() const { return _dependency_graph; }
|
const VLoopDependencyGraph& dependency_graph() const { return _dependency_graph; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -678,7 +739,7 @@ class VPointer : public ArenaObj {
|
|||||||
int invar_factor() const;
|
int invar_factor() const;
|
||||||
|
|
||||||
// Comparable?
|
// Comparable?
|
||||||
bool invar_equals(VPointer& q) {
|
bool invar_equals(const VPointer& q) const {
|
||||||
assert(_debug_invar == NodeSentinel || q._debug_invar == NodeSentinel ||
|
assert(_debug_invar == NodeSentinel || q._debug_invar == NodeSentinel ||
|
||||||
(_invar == q._invar) == (_debug_invar == q._debug_invar &&
|
(_invar == q._invar) == (_debug_invar == q._debug_invar &&
|
||||||
_debug_invar_scale == q._debug_invar_scale &&
|
_debug_invar_scale == q._debug_invar_scale &&
|
||||||
@ -686,7 +747,7 @@ class VPointer : public ArenaObj {
|
|||||||
return _invar == q._invar;
|
return _invar == q._invar;
|
||||||
}
|
}
|
||||||
|
|
||||||
int cmp(VPointer& q) {
|
int cmp(const VPointer& q) const {
|
||||||
if (valid() && q.valid() &&
|
if (valid() && q.valid() &&
|
||||||
(_adr == q._adr || (_base == _adr && q._base == q._adr)) &&
|
(_adr == q._adr || (_base == _adr && q._base == q._adr)) &&
|
||||||
_scale == q._scale && invar_equals(q)) {
|
_scale == q._scale && invar_equals(q)) {
|
||||||
@ -698,7 +759,7 @@ class VPointer : public ArenaObj {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool overlap_possible_with_any_in(Node_List* p) {
|
bool overlap_possible_with_any_in(const Node_List* p) const {
|
||||||
for (uint k = 0; k < p->size(); k++) {
|
for (uint k = 0; k < p->size(); k++) {
|
||||||
MemNode* mem = p->at(k)->as_Mem();
|
MemNode* mem = p->at(k)->as_Mem();
|
||||||
VPointer p_mem(mem, _vloop);
|
VPointer p_mem(mem, _vloop);
|
||||||
@ -712,14 +773,14 @@ class VPointer : public ArenaObj {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool not_equal(VPointer& q) { return not_equal(cmp(q)); }
|
bool not_equal(const VPointer& q) const { return not_equal(cmp(q)); }
|
||||||
bool equal(VPointer& q) { return equal(cmp(q)); }
|
bool equal(const VPointer& q) const { return equal(cmp(q)); }
|
||||||
bool comparable(VPointer& q) { return comparable(cmp(q)); }
|
bool comparable(const VPointer& q) const { return comparable(cmp(q)); }
|
||||||
static bool not_equal(int cmp) { return cmp <= NotEqual; }
|
static bool not_equal(int cmp) { return cmp <= NotEqual; }
|
||||||
static bool equal(int cmp) { return cmp == Equal; }
|
static bool equal(int cmp) { return cmp == Equal; }
|
||||||
static bool comparable(int cmp) { return cmp < NotComparable; }
|
static bool comparable(int cmp) { return cmp < NotComparable; }
|
||||||
|
|
||||||
void print();
|
NOT_PRODUCT( void print() const; )
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
class Tracer {
|
class Tracer {
|
||||||
|
Loading…
Reference in New Issue
Block a user