8287697: Limit auto vectorization to 32-byte vector on Cascade Lake
Reviewed-by: kvn, jbhateja
This commit is contained in:
parent
39ec58b63c
commit
45f1b72a6e
@ -1295,6 +1295,25 @@ void VM_Version::get_processor_features() {
|
||||
FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(SuperWordMaxVectorSize)) {
|
||||
if (FLAG_IS_DEFAULT(UseAVX) && UseAVX > 2 &&
|
||||
is_intel_skylake() && _stepping >= 5) {
|
||||
// Limit auto vectorization to 256 bit (32 byte) by default on Cascade Lake
|
||||
FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MIN2(MaxVectorSize, (intx)32));
|
||||
} else {
|
||||
FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize);
|
||||
}
|
||||
} else {
|
||||
if (SuperWordMaxVectorSize > MaxVectorSize) {
|
||||
warning("SuperWordMaxVectorSize cannot be greater than MaxVectorSize %i", (int) MaxVectorSize);
|
||||
FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize);
|
||||
}
|
||||
if (!is_power_of_2(SuperWordMaxVectorSize)) {
|
||||
warning("SuperWordMaxVectorSize must be a power of 2, setting to MaxVectorSize: %i", (int) MaxVectorSize);
|
||||
FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(COMPILER2) && defined(ASSERT)
|
||||
if (MaxVectorSize > 0) {
|
||||
if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
|
||||
|
@ -82,6 +82,11 @@
|
||||
"actual size could be less depending on elements type") \
|
||||
range(0, max_jint) \
|
||||
\
|
||||
product(intx, SuperWordMaxVectorSize, 64, DIAGNOSTIC, \
|
||||
"Vector size limit in bytes for superword, " \
|
||||
"superword vector size limit in bytes") \
|
||||
range(0, max_jint) \
|
||||
\
|
||||
product(intx, ArrayOperationPartialInlineSize, 0, DIAGNOSTIC, \
|
||||
"Partial inline size used for small array operations" \
|
||||
"(e.g. copy,cmp) acceleration.") \
|
||||
|
@ -199,6 +199,16 @@ bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
|
||||
return success;
|
||||
}
|
||||
|
||||
//------------------------------max vector size------------------------------
|
||||
int SuperWord::max_vector_size(BasicType bt) {
|
||||
int max_vector = Matcher::max_vector_size(bt);
|
||||
int sw_max_vector_limit = SuperWordMaxVectorSize / type2aelembytes(bt);
|
||||
if (max_vector > sw_max_vector_limit) {
|
||||
max_vector = sw_max_vector_limit;
|
||||
}
|
||||
return max_vector;
|
||||
}
|
||||
|
||||
//------------------------------early unrolling analysis------------------------------
|
||||
void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
|
||||
bool is_slp = true;
|
||||
@ -217,7 +227,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
|
||||
ignored_loop_nodes[i] = -1;
|
||||
}
|
||||
|
||||
int max_vector = Matcher::max_vector_size(T_BYTE);
|
||||
int max_vector = max_vector_size(T_BYTE);
|
||||
|
||||
// Process the loop, some/all of the stack entries will not be in order, ergo
|
||||
// need to preprocess the ignored initial state before we process the loop
|
||||
@ -352,7 +362,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
|
||||
|
||||
if (is_java_primitive(bt) == false) continue;
|
||||
|
||||
int cur_max_vector = Matcher::max_vector_size(bt);
|
||||
int cur_max_vector = max_vector_size(bt);
|
||||
|
||||
// If a max vector exists which is not larger than _local_loop_unroll_factor
|
||||
// stop looking, we already have the max vector to map to.
|
||||
@ -991,7 +1001,7 @@ int SuperWord::get_vw_bytes_special(MemNode* s) {
|
||||
}
|
||||
}
|
||||
if (should_combine_adjacent) {
|
||||
vw = MIN2(Matcher::max_vector_size(btype)*type2aelembytes(btype), vw * 2);
|
||||
vw = MIN2(max_vector_size(btype)*type2aelembytes(btype), vw * 2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1689,7 +1699,7 @@ void SuperWord::combine_packs() {
|
||||
Node_List* p1 = _packset.at(i);
|
||||
if (p1 != NULL) {
|
||||
BasicType bt = velt_basic_type(p1->at(0));
|
||||
uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector
|
||||
uint max_vlen = max_vector_size(bt); // Max elements in vector
|
||||
assert(is_power_of_2(max_vlen), "sanity");
|
||||
uint psize = p1->size();
|
||||
if (!is_power_of_2(psize)) {
|
||||
|
@ -327,6 +327,8 @@ class SuperWord : public ResourceObj {
|
||||
|
||||
bool transform_loop(IdealLoopTree* lpt, bool do_optimization);
|
||||
|
||||
int max_vector_size(BasicType bt);
|
||||
|
||||
void unrolling_analysis(int &local_loop_unroll_factor);
|
||||
|
||||
// Accessors for SWPointer
|
||||
|
@ -294,12 +294,17 @@ int VectorNode::replicate_opcode(BasicType bt) {
|
||||
}
|
||||
}
|
||||
|
||||
bool VectorNode::vector_size_supported(BasicType bt, uint vlen) {
|
||||
return (Matcher::vector_size_supported(bt, vlen) &&
|
||||
(vlen * type2aelembytes(bt) <= (uint)SuperWordMaxVectorSize));
|
||||
}
|
||||
|
||||
// Also used to check if the code generator
|
||||
// supports the vector operation.
|
||||
bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
|
||||
if (is_java_primitive(bt) &&
|
||||
(vlen > 1) && is_power_of_2(vlen) &&
|
||||
Matcher::vector_size_supported(bt, vlen)) {
|
||||
vector_size_supported(bt, vlen)) {
|
||||
int vopc = VectorNode::opcode(opc, bt);
|
||||
// For rotate operation we will do a lazy de-generation into
|
||||
// OrV/LShiftV/URShiftV pattern if the target does not support
|
||||
@ -1320,7 +1325,7 @@ Node* ReductionNode::make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt)
|
||||
bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
|
||||
if (is_java_primitive(bt) &&
|
||||
(vlen > 1) && is_power_of_2(vlen) &&
|
||||
Matcher::vector_size_supported(bt, vlen)) {
|
||||
VectorNode::vector_size_supported(bt, vlen)) {
|
||||
int vopc = ReductionNode::opcode(opc, bt);
|
||||
return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt);
|
||||
}
|
||||
|
@ -88,6 +88,7 @@ class VectorNode : public TypeNode {
|
||||
|
||||
static int opcode(int opc, BasicType bt);
|
||||
static int replicate_opcode(BasicType bt);
|
||||
static bool vector_size_supported(BasicType bt, uint vlen);
|
||||
static bool implemented(int opc, uint vlen, BasicType bt);
|
||||
static bool is_shift(Node* n);
|
||||
static bool is_vshift_cnt(Node* n);
|
||||
|
Loading…
x
Reference in New Issue
Block a user