8287697: Limit auto vectorization to 32-byte vector on Cascade Lake

Reviewed-by: kvn, jbhateja
This commit is contained in:
Sandhya Viswanathan 2022-06-08 01:05:20 +00:00
parent 39ec58b63c
commit 45f1b72a6e
6 changed files with 48 additions and 6 deletions

View File

@ -1295,6 +1295,25 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
}
if (FLAG_IS_DEFAULT(SuperWordMaxVectorSize)) {
if (FLAG_IS_DEFAULT(UseAVX) && UseAVX > 2 &&
is_intel_skylake() && _stepping >= 5) {
// Limit auto vectorization to 256 bit (32 byte) by default on Cascade Lake
FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MIN2(MaxVectorSize, (intx)32));
} else {
FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize);
}
} else {
if (SuperWordMaxVectorSize > MaxVectorSize) {
warning("SuperWordMaxVectorSize cannot be greater than MaxVectorSize %i", (int) MaxVectorSize);
FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize);
}
if (!is_power_of_2(SuperWordMaxVectorSize)) {
warning("SuperWordMaxVectorSize must be a power of 2, setting to MaxVectorSize: %i", (int) MaxVectorSize);
FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize);
}
}
#if defined(COMPILER2) && defined(ASSERT)
if (MaxVectorSize > 0) {
if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {

View File

@ -82,6 +82,11 @@
"actual size could be less depending on elements type") \
range(0, max_jint) \
\
product(intx, SuperWordMaxVectorSize, 64, DIAGNOSTIC, \
"Vector size limit in bytes for superword, " \
"superword vector size limit in bytes") \
range(0, max_jint) \
\
product(intx, ArrayOperationPartialInlineSize, 0, DIAGNOSTIC, \
"Partial inline size used for small array operations" \
"(e.g. copy,cmp) acceleration.") \

View File

@ -199,6 +199,16 @@ bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
return success;
}
//------------------------------max vector size------------------------------
int SuperWord::max_vector_size(BasicType bt) {
int max_vector = Matcher::max_vector_size(bt);
int sw_max_vector_limit = SuperWordMaxVectorSize / type2aelembytes(bt);
if (max_vector > sw_max_vector_limit) {
max_vector = sw_max_vector_limit;
}
return max_vector;
}
//------------------------------early unrolling analysis------------------------------
void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
bool is_slp = true;
@ -217,7 +227,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
ignored_loop_nodes[i] = -1;
}
int max_vector = Matcher::max_vector_size(T_BYTE);
int max_vector = max_vector_size(T_BYTE);
// Process the loop, some/all of the stack entries will not be in order, ergo
// need to preprocess the ignored initial state before we process the loop
@ -352,7 +362,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
if (is_java_primitive(bt) == false) continue;
int cur_max_vector = Matcher::max_vector_size(bt);
int cur_max_vector = max_vector_size(bt);
// If a max vector exists which is not larger than _local_loop_unroll_factor
// stop looking, we already have the max vector to map to.
@ -991,7 +1001,7 @@ int SuperWord::get_vw_bytes_special(MemNode* s) {
}
}
if (should_combine_adjacent) {
vw = MIN2(Matcher::max_vector_size(btype)*type2aelembytes(btype), vw * 2);
vw = MIN2(max_vector_size(btype)*type2aelembytes(btype), vw * 2);
}
}
@ -1689,7 +1699,7 @@ void SuperWord::combine_packs() {
Node_List* p1 = _packset.at(i);
if (p1 != NULL) {
BasicType bt = velt_basic_type(p1->at(0));
uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector
uint max_vlen = max_vector_size(bt); // Max elements in vector
assert(is_power_of_2(max_vlen), "sanity");
uint psize = p1->size();
if (!is_power_of_2(psize)) {

View File

@ -327,6 +327,8 @@ class SuperWord : public ResourceObj {
bool transform_loop(IdealLoopTree* lpt, bool do_optimization);
int max_vector_size(BasicType bt);
void unrolling_analysis(int &local_loop_unroll_factor);
// Accessors for SWPointer

View File

@ -294,12 +294,17 @@ int VectorNode::replicate_opcode(BasicType bt) {
}
}
bool VectorNode::vector_size_supported(BasicType bt, uint vlen) {
return (Matcher::vector_size_supported(bt, vlen) &&
(vlen * type2aelembytes(bt) <= (uint)SuperWordMaxVectorSize));
}
// Also used to check if the code generator
// supports the vector operation.
bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
if (is_java_primitive(bt) &&
(vlen > 1) && is_power_of_2(vlen) &&
Matcher::vector_size_supported(bt, vlen)) {
vector_size_supported(bt, vlen)) {
int vopc = VectorNode::opcode(opc, bt);
// For rotate operation we will do a lazy de-generation into
// OrV/LShiftV/URShiftV pattern if the target does not support
@ -1320,7 +1325,7 @@ Node* ReductionNode::make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt)
bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
if (is_java_primitive(bt) &&
(vlen > 1) && is_power_of_2(vlen) &&
Matcher::vector_size_supported(bt, vlen)) {
VectorNode::vector_size_supported(bt, vlen)) {
int vopc = ReductionNode::opcode(opc, bt);
return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt);
}

View File

@ -88,6 +88,7 @@ class VectorNode : public TypeNode {
static int opcode(int opc, BasicType bt);
static int replicate_opcode(BasicType bt);
static bool vector_size_supported(BasicType bt, uint vlen);
static bool implemented(int opc, uint vlen, BasicType bt);
static bool is_shift(Node* n);
static bool is_vshift_cnt(Node* n);