8309204: Obsolete DoReserveCopyInSuperWord

Reviewed-by: kvn, thartmann
This commit is contained in:
Emanuel Peter 2023-10-05 07:01:18 +00:00
parent 3630af26e6
commit 1ed9c76ec8
11 changed files with 66 additions and 312 deletions

View File

@ -341,9 +341,6 @@
product(bool, UseCMoveUnconditionally, false, \ product(bool, UseCMoveUnconditionally, false, \
"Use CMove (scalar and vector) ignoring profitability test.") \ "Use CMove (scalar and vector) ignoring profitability test.") \
\ \
product(bool, DoReserveCopyInSuperWord, true, \
"Create reserve copy of graph in SuperWord.") \
\
notproduct(bool, TraceSuperWord, false, \ notproduct(bool, TraceSuperWord, false, \
"Trace superword transforms") \ "Trace superword transforms") \
\ \

View File

@ -55,6 +55,9 @@ const char* C2Compiler::retry_no_iterative_escape_analysis() {
const char* C2Compiler::retry_no_reduce_allocation_merges() { const char* C2Compiler::retry_no_reduce_allocation_merges() {
return "retry without reducing allocation merges"; return "retry without reducing allocation merges";
} }
const char* C2Compiler::retry_no_superword() {
return "retry without SuperWord";
}
void compiler_stubs_init(bool in_compiler_thread); void compiler_stubs_init(bool in_compiler_thread);
@ -112,10 +115,18 @@ void C2Compiler::compile_method(ciEnv* env, ciMethod* target, int entry_bci, boo
bool do_reduce_allocation_merges = ReduceAllocationMerges && EliminateAllocations; bool do_reduce_allocation_merges = ReduceAllocationMerges && EliminateAllocations;
bool eliminate_boxing = EliminateAutoBox; bool eliminate_boxing = EliminateAutoBox;
bool do_locks_coarsening = EliminateLocks; bool do_locks_coarsening = EliminateLocks;
bool do_superword = UseSuperWord;
while (!env->failing()) { while (!env->failing()) {
// Attempt to compile while subsuming loads into machine instructions. // Attempt to compile while subsuming loads into machine instructions.
Options options(subsume_loads, do_escape_analysis, do_iterative_escape_analysis, do_reduce_allocation_merges, eliminate_boxing, do_locks_coarsening, install_code); Options options(subsume_loads,
do_escape_analysis,
do_iterative_escape_analysis,
do_reduce_allocation_merges,
eliminate_boxing,
do_locks_coarsening,
do_superword,
install_code);
Compile C(env, target, entry_bci, options, directive); Compile C(env, target, entry_bci, options, directive);
// Check result and retry if appropriate. // Check result and retry if appropriate.
@ -150,6 +161,12 @@ void C2Compiler::compile_method(ciEnv* env, ciMethod* target, int entry_bci, boo
env->report_failure(C.failure_reason()); env->report_failure(C.failure_reason());
continue; // retry continue; // retry
} }
if (C.failure_reason_is(retry_no_superword())) {
assert(do_superword, "must make progress");
do_superword = false;
env->report_failure(C.failure_reason());
continue; // retry
}
if (C.has_boxed_value()) { if (C.has_boxed_value()) {
// Recompile without boxing elimination regardless failure reason. // Recompile without boxing elimination regardless failure reason.
assert(eliminate_boxing, "must make progress"); assert(eliminate_boxing, "must make progress");

View File

@ -52,6 +52,7 @@ public:
static const char* retry_no_iterative_escape_analysis(); static const char* retry_no_iterative_escape_analysis();
static const char* retry_no_reduce_allocation_merges(); static const char* retry_no_reduce_allocation_merges();
static const char* retry_no_locks_coarsening(); static const char* retry_no_locks_coarsening();
static const char* retry_no_superword();
// Print compilation timers and statistics // Print compilation timers and statistics
void print_timers(); void print_timers();

View File

@ -180,12 +180,16 @@ class Options {
const bool _do_reduce_allocation_merges; // Do try to reduce allocation merges. const bool _do_reduce_allocation_merges; // Do try to reduce allocation merges.
const bool _eliminate_boxing; // Do boxing elimination. const bool _eliminate_boxing; // Do boxing elimination.
const bool _do_locks_coarsening; // Do locks coarsening const bool _do_locks_coarsening; // Do locks coarsening
const bool _do_superword; // Do SuperWord
const bool _install_code; // Install the code that was compiled const bool _install_code; // Install the code that was compiled
public: public:
Options(bool subsume_loads, bool do_escape_analysis, Options(bool subsume_loads,
bool do_escape_analysis,
bool do_iterative_escape_analysis, bool do_iterative_escape_analysis,
bool do_reduce_allocation_merges, bool do_reduce_allocation_merges,
bool eliminate_boxing, bool do_locks_coarsening, bool eliminate_boxing,
bool do_locks_coarsening,
bool do_superword,
bool install_code) : bool install_code) :
_subsume_loads(subsume_loads), _subsume_loads(subsume_loads),
_do_escape_analysis(do_escape_analysis), _do_escape_analysis(do_escape_analysis),
@ -193,6 +197,7 @@ class Options {
_do_reduce_allocation_merges(do_reduce_allocation_merges), _do_reduce_allocation_merges(do_reduce_allocation_merges),
_eliminate_boxing(eliminate_boxing), _eliminate_boxing(eliminate_boxing),
_do_locks_coarsening(do_locks_coarsening), _do_locks_coarsening(do_locks_coarsening),
_do_superword(do_superword),
_install_code(install_code) { _install_code(install_code) {
} }
@ -204,6 +209,7 @@ class Options {
/* do_reduce_allocation_merges = */ false, /* do_reduce_allocation_merges = */ false,
/* eliminate_boxing = */ false, /* eliminate_boxing = */ false,
/* do_lock_coarsening = */ false, /* do_lock_coarsening = */ false,
/* do_superword = */ true,
/* install_code = */ true /* install_code = */ true
); );
} }
@ -578,6 +584,7 @@ private:
bool should_install_code() const { return _options._install_code; } bool should_install_code() const { return _options._install_code; }
/** Do locks coarsening. */ /** Do locks coarsening. */
bool do_locks_coarsening() const { return _options._do_locks_coarsening; } bool do_locks_coarsening() const { return _options._do_locks_coarsening; }
bool do_superword() const { return _options._do_superword; }
// Other fixed compilation parameters. // Other fixed compilation parameters.
ciMethod* method() const { return _method; } ciMethod* method() const { return _method; }

View File

@ -933,7 +933,8 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
// the residual iterations are more than 10% of the trip count // the residual iterations are more than 10% of the trip count
// and rounds of "unroll,optimize" are not making significant progress // and rounds of "unroll,optimize" are not making significant progress
// Progress defined as current size less than 20% larger than previous size. // Progress defined as current size less than 20% larger than previous size.
if (UseSuperWord && cl->node_count_before_unroll() > 0 && if (phase->C->do_superword() &&
cl->node_count_before_unroll() > 0 &&
future_unroll_cnt > LoopUnrollMin && future_unroll_cnt > LoopUnrollMin &&
is_residual_iters_large(future_unroll_cnt, cl) && is_residual_iters_large(future_unroll_cnt, cl) &&
1.2 * cl->node_count_before_unroll() < (double)_body.size()) { 1.2 * cl->node_count_before_unroll() < (double)_body.size()) {
@ -1038,7 +1039,7 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
} // switch } // switch
} }
if (UseSuperWord) { if (phase->C->do_superword()) {
// Only attempt slp analysis when user controls do not prohibit it // Only attempt slp analysis when user controls do not prohibit it
if (!range_checks_present() && (LoopMaxUnroll > _local_loop_unroll_factor)) { if (!range_checks_present() && (LoopMaxUnroll > _local_loop_unroll_factor)) {
// Once policy_slp_analysis succeeds, mark the loop with the // Once policy_slp_analysis succeeds, mark the loop with the

View File

@ -284,148 +284,3 @@ void PhaseIdealLoop::verify_fast_loop(LoopNode* head, const ProjNode* proj_true)
} }
#endif // ASSERT #endif // ASSERT
LoopNode* PhaseIdealLoop::create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk) {
Node_List old_new;
LoopNode* head = loop->_head->as_Loop();
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
_igvn.rehash_node_delayed(entry);
IdealLoopTree* outer_loop = head->is_strip_mined() ? loop->_parent->_parent : loop->_parent;
ConINode* const_1 = _igvn.intcon(1);
set_ctrl(const_1, C->root());
IfNode* iff = new IfNode(entry, const_1, PROB_MAX, COUNT_UNKNOWN);
register_node(iff, outer_loop, entry, dom_depth(entry));
ProjNode* iffast = new IfTrueNode(iff);
register_node(iffast, outer_loop, iff, dom_depth(iff));
ProjNode* ifslow = new IfFalseNode(iff);
register_node(ifslow, outer_loop, iff, dom_depth(iff));
// Clone the loop body. The clone becomes the slow loop. The
// original pre-header will (illegally) have 3 control users
// (old & new loops & new if).
clone_loop(loop, old_new, dom_depth(head), CloneIncludesStripMined, iff);
assert(old_new[head->_idx]->is_Loop(), "" );
LoopNode* slow_head = old_new[head->_idx]->as_Loop();
#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print_cr("PhaseIdealLoop::create_reserve_version_of_loop:");
tty->print("\t iff = %d, ", iff->_idx); iff->dump();
tty->print("\t iffast = %d, ", iffast->_idx); iffast->dump();
tty->print("\t ifslow = %d, ", ifslow->_idx); ifslow->dump();
tty->print("\t before replace_input_of: head = %d, ", head->_idx); head->dump();
tty->print("\t before replace_input_of: slow_head = %d, ", slow_head->_idx); slow_head->dump();
}
#endif
// Fast (true) control
_igvn.replace_input_of(head->skip_strip_mined(), LoopNode::EntryControl, iffast);
// Slow (false) control
_igvn.replace_input_of(slow_head->skip_strip_mined(), LoopNode::EntryControl, ifslow);
recompute_dom_depth();
lk->set_iff(iff);
#ifndef PRODUCT
if (TraceLoopOpts ) {
tty->print("\t after replace_input_of: head = %d, ", head->_idx); head->dump();
tty->print("\t after replace_input_of: slow_head = %d, ", slow_head->_idx); slow_head->dump();
}
#endif
return slow_head->as_Loop();
}
CountedLoopReserveKit::CountedLoopReserveKit(PhaseIdealLoop* phase, IdealLoopTree *loop, bool active = true) :
_phase(phase),
_lpt(loop),
_lp(nullptr),
_iff(nullptr),
_lp_reserved(nullptr),
_has_reserved(false),
_use_new(false),
_active(active)
{
create_reserve();
};
CountedLoopReserveKit::~CountedLoopReserveKit() {
if (!_active) {
return;
}
if (_has_reserved && !_use_new) {
// intcon(0)->iff-node reverts CF to the reserved copy
ConINode* const_0 = _phase->_igvn.intcon(0);
_phase->set_ctrl(const_0, _phase->C->root());
_iff->set_req(1, const_0);
#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print_cr("CountedLoopReserveKit::~CountedLoopReserveKit()");
tty->print("\t discard loop %d and revert to the reserved loop clone %d: ", _lp->_idx, _lp_reserved->_idx);
_lp_reserved->dump();
}
#endif
}
}
bool CountedLoopReserveKit::create_reserve() {
if (!_active) {
return false;
}
if(!_lpt->_head->is_CountedLoop()) {
if (TraceLoopOpts) {
tty->print_cr("CountedLoopReserveKit::create_reserve: %d not counted loop", _lpt->_head->_idx);
}
return false;
}
CountedLoopNode *cl = _lpt->_head->as_CountedLoop();
if (!cl->is_valid_counted_loop(T_INT)) {
if (TraceLoopOpts) {
tty->print_cr("CountedLoopReserveKit::create_reserve: %d not valid counted loop", cl->_idx);
}
return false; // skip malformed counted loop
}
if (!cl->is_main_loop()) {
bool loop_not_canonical = true;
if (cl->is_post_loop() && (cl->slp_max_unroll() > 0)) {
loop_not_canonical = false;
}
// only reject some loop forms
if (loop_not_canonical) {
if (TraceLoopOpts) {
tty->print_cr("CountedLoopReserveKit::create_reserve: %d not canonical loop", cl->_idx);
}
return false; // skip normal, pre, and post (conditionally) loops
}
}
_lp = _lpt->_head->as_Loop();
_lp_reserved = _phase->create_reserve_version_of_loop(_lpt, this);
if (!_lp_reserved->is_CountedLoop()) {
return false;
}
Node* ifslow_pred = _lp_reserved->skip_strip_mined()->in(LoopNode::EntryControl);
if (!ifslow_pred->is_IfFalse()) {
return false;
}
Node* iff = ifslow_pred->in(0);
if (!iff->is_If() || iff != _iff) {
return false;
}
if (iff->in(1)->Opcode() != Op_ConI) {
return false;
}
_has_reserved = true;
return true;
}

View File

@ -4640,7 +4640,7 @@ void PhaseIdealLoop::build_and_optimize() {
} }
// Convert scalar to superword operations at the end of all loop opts. // Convert scalar to superword operations at the end of all loop opts.
if (UseSuperWord && C->has_loops() && !C->major_progress()) { if (C->do_superword() && C->has_loops() && !C->major_progress()) {
// SuperWord transform // SuperWord transform
SuperWord sw(this); SuperWord sw(this);
for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
@ -5359,12 +5359,13 @@ int PhaseIdealLoop::build_loop_tree_impl( Node *n, int pre_order ) {
if( !n->is_CallStaticJava() || !n->as_CallStaticJava()->_name ) { if( !n->is_CallStaticJava() || !n->as_CallStaticJava()->_name ) {
Node *iff = n->in(0)->in(0); Node *iff = n->in(0)->in(0);
// No any calls for vectorized loops. // No any calls for vectorized loops.
if( UseSuperWord || !iff->is_If() || if (C->do_superword() ||
(n->in(0)->Opcode() == Op_IfFalse && !iff->is_If() ||
(1.0 - iff->as_If()->_prob) >= 0.01) || (n->in(0)->Opcode() == Op_IfFalse && (1.0 - iff->as_If()->_prob) >= 0.01) ||
(iff->as_If()->_prob >= 0.01) ) iff->as_If()->_prob >= 0.01) {
innermost->_has_call = 1; innermost->_has_call = 1;
} }
}
} else if( n->is_Allocate() && n->as_Allocate()->_is_scalar_replaceable ) { } else if( n->is_Allocate() && n->as_Allocate()->_is_scalar_replaceable ) {
// Disable loop optimizations if the loop has a scalar replaceable // Disable loop optimizations if the loop has a scalar replaceable
// allocation. This disabling may cause a potential performance lost // allocation. This disabling may cause a potential performance lost

View File

@ -42,7 +42,6 @@ class OuterStripMinedLoopEndNode;
class PredicateBlock; class PredicateBlock;
class PathFrequency; class PathFrequency;
class PhaseIdealLoop; class PhaseIdealLoop;
class CountedLoopReserveKit;
class VectorSet; class VectorSet;
class Invariance; class Invariance;
struct small_cache; struct small_cache;
@ -818,7 +817,6 @@ public:
class PhaseIdealLoop : public PhaseTransform { class PhaseIdealLoop : public PhaseTransform {
friend class IdealLoopTree; friend class IdealLoopTree;
friend class SuperWord; friend class SuperWord;
friend class CountedLoopReserveKit;
friend class ShenandoahBarrierC2Support; friend class ShenandoahBarrierC2Support;
friend class AutoNodeBudget; friend class AutoNodeBudget;
@ -1415,16 +1413,6 @@ public:
IfNode* unswitch_iff, IfNode* unswitch_iff,
CloneLoopMode mode); CloneLoopMode mode);
// Clone a loop and return the clone head (clone_loop_head).
// Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
// This routine was created for usage in CountedLoopReserveKit.
//
// int(1) -> If -> IfTrue -> original_loop_head
// |
// V
// IfFalse -> clone_loop_head (returned by function pointer)
//
LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
// Clone loop with an invariant test (that does not exit) and // Clone loop with an invariant test (that does not exit) and
// insert a clone of the test that selects which version to // insert a clone of the test that selects which version to
// execute. // execute.
@ -1794,69 +1782,6 @@ private:
uint _nodes_at_begin; uint _nodes_at_begin;
}; };
// This kit may be used for making of a reserved copy of a loop before this loop
// goes under non-reversible changes.
//
// Function create_reserve() creates a reserved copy (clone) of the loop.
// The reserved copy is created by calling
// PhaseIdealLoop::create_reserve_version_of_loop - see there how
// the original and reserved loops are connected in the outer graph.
// If create_reserve succeeded, it returns 'true' and _has_reserved is set to 'true'.
//
// By default the reserved copy (clone) of the loop is created as dead code - it is
// dominated in the outer loop by this node chain:
// intcon(1)->If->IfFalse->reserved_copy.
// The original loop is dominated by the same node chain but IfTrue projection:
// intcon(0)->If->IfTrue->original_loop.
//
// In this implementation of CountedLoopReserveKit the ctor includes create_reserve()
// and the dtor, checks _use_new value.
// If _use_new == false, it "switches" control to reserved copy of the loop
// by simple replacing of node intcon(1) with node intcon(0).
//
// Here is a proposed example of usage (see also SuperWord::output in superword.cpp).
//
// void CountedLoopReserveKit_example()
// {
// CountedLoopReserveKit lrk((phase, lpt, DoReserveCopy = true); // create local object
// if (DoReserveCopy && !lrk.has_reserved()) {
// return; //failed to create reserved loop copy
// }
// ...
// //something is wrong, switch to original loop
/// if(something_is_wrong) return; // ~CountedLoopReserveKit makes the switch
// ...
// //everything worked ok, return with the newly modified loop
// lrk.use_new();
// return; // ~CountedLoopReserveKit does nothing once use_new() was called
// }
//
// Keep in mind, that by default if create_reserve() is not followed by use_new()
// the dtor will "switch to the original" loop.
// NOTE. You you modify outside of the original loop this class is no help.
//
class CountedLoopReserveKit {
private:
PhaseIdealLoop* _phase;
IdealLoopTree* _lpt;
LoopNode* _lp;
IfNode* _iff;
LoopNode* _lp_reserved;
bool _has_reserved;
bool _use_new;
const bool _active; //may be set to false in ctor, then the object is dummy
public:
CountedLoopReserveKit(PhaseIdealLoop* phase, IdealLoopTree *loop, bool active);
~CountedLoopReserveKit();
void use_new() {_use_new = true;}
void set_iff(IfNode* x) {_iff = x;}
bool has_reserved() const { return _active && _has_reserved;}
private:
bool create_reserve();
};// class CountedLoopReserveKit
inline Node* IdealLoopTree::tail() { inline Node* IdealLoopTree::tail() {
// Handle lazy update of _tail field. // Handle lazy update of _tail field.
if (_tail->in(0) == nullptr) { if (_tail->in(0) == nullptr) {

View File

@ -26,6 +26,7 @@
#include "memory/allocation.inline.hpp" #include "memory/allocation.inline.hpp"
#include "memory/resourceArea.hpp" #include "memory/resourceArea.hpp"
#include "opto/addnode.hpp" #include "opto/addnode.hpp"
#include "opto/c2compiler.hpp"
#include "opto/castnode.hpp" #include "opto/castnode.hpp"
#include "opto/convertnode.hpp" #include "opto/convertnode.hpp"
#include "opto/matcher.hpp" #include "opto/matcher.hpp"
@ -70,7 +71,6 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
_race_possible(false), // cases where SDMU is true _race_possible(false), // cases where SDMU is true
_early_return(true), // analysis evaluations routine _early_return(true), // analysis evaluations routine
_do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style
_do_reserve_copy(DoReserveCopyInSuperWord),
_num_work_vecs(0), // amount of vector work we have _num_work_vecs(0), // amount of vector work we have
_num_reductions(0) // amount of reduction work we have _num_reductions(0) // amount of reduction work we have
{ {
@ -85,7 +85,7 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
//------------------------------transform_loop--------------------------- //------------------------------transform_loop---------------------------
bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
assert(UseSuperWord, "should be"); assert(_phase->C->do_superword(), "SuperWord option should be enabled");
// SuperWord only works with power of two vector sizes. // SuperWord only works with power of two vector sizes.
int vector_width = Matcher::vector_width_in_bytes(T_BYTE); int vector_width = Matcher::vector_width_in_bytes(T_BYTE);
if (vector_width < 2 || !is_power_of_2(vector_width)) { if (vector_width < 2 || !is_power_of_2(vector_width)) {
@ -2493,24 +2493,13 @@ void SuperWord::schedule_reorder_memops(Node_List &memops_schedule) {
} }
} }
#ifndef PRODUCT
void SuperWord::print_loop(bool whole) {
Node_Stack stack(_arena, _phase->C->unique() >> 2);
Node_List rpo_list;
VectorSet visited(_arena);
visited.set(lpt()->_head->_idx);
_phase->rpo(lpt()->_head, stack, visited, rpo_list);
_phase->dump(lpt(), rpo_list.size(), rpo_list );
if(whole) {
tty->print_cr("\n Whole loop tree");
_phase->dump();
tty->print_cr(" End of whole loop tree\n");
}
}
#endif
//------------------------------output--------------------------- //------------------------------output---------------------------
// Convert packs into vector node operations // Convert packs into vector node operations
// At this point, all correctness and profitability checks have passed.
// We start the irreversible process of editing the C2 graph. Should
// there be an unexpected situation (assert fails), then we can only
// bail out of the compilation, as the graph has already been partially
// modified. We bail out, and retry without SuperWord.
bool SuperWord::output() { bool SuperWord::output() {
CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
assert(cl->is_main_loop(), "SLP should only work on main loops"); assert(cl->is_main_loop(), "SLP should only work on main loops");
@ -2538,17 +2527,6 @@ bool SuperWord::output() {
uint max_vlen_in_bytes = 0; uint max_vlen_in_bytes = 0;
uint max_vlen = 0; uint max_vlen = 0;
NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("VPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);})
CountedLoopReserveKit make_reversable(_phase, _lpt, do_reserve_copy());
NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("VPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);})
if (do_reserve_copy() && !make_reversable.has_reserved()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("VPointer::output: loop was not reserved correctly, exiting SuperWord");})
return false;
}
for (int i = 0; i < _block.length(); i++) { for (int i = 0; i < _block.length(); i++) {
Node* n = _block.at(i); Node* n = _block.at(i);
Node_List* p = my_pack(n); Node_List* p = my_pack(n);
@ -2584,12 +2562,9 @@ bool SuperWord::output() {
// Promote value to be stored to vector // Promote value to be stored to vector
Node* val = vector_opd(p, MemNode::ValueIn); Node* val = vector_opd(p, MemNode::ValueIn);
if (val == nullptr) { if (val == nullptr) {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("VPointer::output: val should not be null, exiting SuperWord");})
assert(false, "input to vector store was not created"); assert(false, "input to vector store was not created");
return false; //and reverse to backup IG C->record_failure(C2Compiler::retry_no_superword());
} return false; // bailout
ShouldNotReachHere();
} }
Node* ctl = n->in(MemNode::Control); Node* ctl = n->in(MemNode::Control);
@ -2726,22 +2701,16 @@ bool SuperWord::output() {
} else { } else {
in1 = vector_opd(p, 1); in1 = vector_opd(p, 1);
if (in1 == nullptr) { if (in1 == nullptr) {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("VPointer::output: in1 should not be null, exiting SuperWord");})
assert(false, "input in1 to vector operand was not created"); assert(false, "input in1 to vector operand was not created");
return false; //and reverse to backup IG C->record_failure(C2Compiler::retry_no_superword());
} return false; // bailout
ShouldNotReachHere();
} }
} }
Node* in2 = vector_opd(p, 2); Node* in2 = vector_opd(p, 2);
if (in2 == nullptr) { if (in2 == nullptr) {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("VPointer::output: in2 should not be null, exiting SuperWord");})
assert(false, "input in2 to vector operand was not created"); assert(false, "input in2 to vector operand was not created");
return false; //and reverse to backup IG C->record_failure(C2Compiler::retry_no_superword());
} return false; // bailout
ShouldNotReachHere();
} }
if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) { if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) {
// Move invariant vector input into second position to avoid register spilling. // Move invariant vector input into second position to avoid register spilling.
@ -2810,21 +2779,15 @@ bool SuperWord::output() {
vn = VectorNode::make(opc, in1, in2, in3, vlen, velt_basic_type(n)); vn = VectorNode::make(opc, in1, in2, in3, vlen, velt_basic_type(n));
vlen_in_bytes = vn->as_Vector()->length_in_bytes(); vlen_in_bytes = vn->as_Vector()->length_in_bytes();
} else { } else {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("VPointer::output: Unhandled scalar opcode (%s), ShouldNotReachHere, exiting SuperWord", NodeClassNames[opc]);})
assert(false, "Unhandled scalar opcode (%s)", NodeClassNames[opc]); assert(false, "Unhandled scalar opcode (%s)", NodeClassNames[opc]);
return false; //and reverse to backup IG C->record_failure(C2Compiler::retry_no_superword());
} return false; // bailout
ShouldNotReachHere();
} }
assert(vn != nullptr, "sanity");
if (vn == nullptr) { if (vn == nullptr) {
if (do_reserve_copy()){ assert(false, "got null node instead of vector node");
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("VPointer::output: got null node, cannot proceed, exiting SuperWord");}) C->record_failure(C2Compiler::retry_no_superword());
return false; //and reverse to backup IG return false; // bailout
}
ShouldNotReachHere();
} }
_block.at_put(i, vn); _block.at_put(i, vn);
@ -2871,11 +2834,6 @@ bool SuperWord::output() {
} }
} }
if (do_reserve_copy()) {
make_reversable.use_new();
}
NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("\n Final loop after SuperWord"); print_loop(true);})
return true; return true;
} }
@ -2906,9 +2864,8 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
if (have_same_inputs) { if (have_same_inputs) {
if (opd->is_Vector() || opd->is_LoadVector()) { if (opd->is_Vector() || opd->is_LoadVector()) {
assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
if (opd_idx == 2 && VectorNode::is_shift(p0)) { if (opd_idx == 2 && VectorNode::is_shift(p0)) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("shift's count can't be vector");}) assert(false, "shift's count can't be vector");
return nullptr; return nullptr;
} }
return opd; // input is matching vector return opd; // input is matching vector
@ -2932,9 +2889,8 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
_igvn.register_new_node_with_optimizer(cnt); _igvn.register_new_node_with_optimizer(cnt);
_phase->set_ctrl(cnt, _phase->get_ctrl(opd)); _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
} }
assert(opd->bottom_type()->isa_int(), "int type only");
if (!opd->bottom_type()->isa_int()) { if (!opd->bottom_type()->isa_int()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should be int type only");}) assert(false, "int type only");
return nullptr; return nullptr;
} }
} }
@ -2944,9 +2900,8 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
_phase->set_ctrl(cnt, _phase->get_ctrl(opd)); _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
return cnt; return cnt;
} }
assert(!opd->is_StoreVector(), "such vector is not expected here");
if (opd->is_StoreVector()) { if (opd->is_StoreVector()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("StoreVector is not expected here");}) assert(false, "StoreVector is not expected here");
return nullptr; return nullptr;
} }
// Convert scalar input to vector with the same number of elements as // Convert scalar input to vector with the same number of elements as
@ -2983,18 +2938,16 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
for (uint i = 1; i < vlen; i++) { for (uint i = 1; i < vlen; i++) {
Node* pi = p->at(i); Node* pi = p->at(i);
Node* in = pi->in(opd_idx); Node* in = pi->in(opd_idx);
assert(my_pack(in) == nullptr, "Should already have been unpacked");
if (my_pack(in) != nullptr) { if (my_pack(in) != nullptr) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");}) assert(false, "Should already have been unpacked");
return nullptr; return nullptr;
} }
assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
pk->add_opd(in); pk->add_opd(in);
if (VectorNode::is_muladds2i(pi)) { if (VectorNode::is_muladds2i(pi)) {
Node* in2 = pi->in(opd_idx + 2); Node* in2 = pi->in(opd_idx + 2);
assert(my_pack(in2) == nullptr, "Should already have been unpacked");
if (my_pack(in2) != nullptr) { if (my_pack(in2) != nullptr) {
NOT_PRODUCT(if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr("Should already have been unpacked"); }) assert(false, "Should already have been unpacked");
return nullptr; return nullptr;
} }
assert(opd_bt == in2->bottom_type()->basic_type(), "all same type"); assert(opd_bt == in2->bottom_type()->basic_type(), "all same type");

View File

@ -281,10 +281,8 @@ class SuperWord : public ResourceObj {
bool is_trace_loop() { return (_vector_loop_debug & 8) > 0; } bool is_trace_loop() { return (_vector_loop_debug & 8) > 0; }
bool is_trace_adjacent() { return (_vector_loop_debug & 16) > 0; } bool is_trace_adjacent() { return (_vector_loop_debug & 16) > 0; }
bool is_trace_cmov() { return (_vector_loop_debug & 32) > 0; } bool is_trace_cmov() { return (_vector_loop_debug & 32) > 0; }
bool is_trace_loop_reverse() { return (_vector_loop_debug & 64) > 0; }
#endif #endif
bool do_vector_loop() { return _do_vector_loop; } bool do_vector_loop() { return _do_vector_loop; }
bool do_reserve_copy() { return _do_reserve_copy; }
const GrowableArray<Node_List*>& packset() const { return _packset; } const GrowableArray<Node_List*>& packset() const { return _packset; }
const GrowableArray<Node*>& block() const { return _block; } const GrowableArray<Node*>& block() const { return _block; }
@ -298,7 +296,6 @@ class SuperWord : public ResourceObj {
bool _race_possible; // In cases where SDMU is true bool _race_possible; // In cases where SDMU is true
bool _early_return; // True if we do not initialize bool _early_return; // True if we do not initialize
bool _do_vector_loop; // whether to do vectorization/simd style bool _do_vector_loop; // whether to do vectorization/simd style
bool _do_reserve_copy; // do reserve copy of the graph(loop) before final modification in output
int _num_work_vecs; // Number of non memory vector operations int _num_work_vecs; // Number of non memory vector operations
int _num_reductions; // Number of reduction expressions applied int _num_reductions; // Number of reduction expressions applied
#ifndef PRODUCT #ifndef PRODUCT
@ -447,7 +444,6 @@ private:
// Tracing support // Tracing support
#ifndef PRODUCT #ifndef PRODUCT
void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment); void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment);
void print_loop(bool whole);
#endif #endif
// If strict memory alignment is required (vectors_should_be_aligned), then check if // If strict memory alignment is required (vectors_should_be_aligned), then check if
// mem_ref is aligned with best_align_to_mem_ref. // mem_ref is aligned with best_align_to_mem_ref.

View File

@ -524,6 +524,7 @@ static SpecialFlag const special_jvm_flags[] = {
{ "G1ConcRSHotCardLimit", JDK_Version::undefined(), JDK_Version::jdk(21), JDK_Version::undefined() }, { "G1ConcRSHotCardLimit", JDK_Version::undefined(), JDK_Version::jdk(21), JDK_Version::undefined() },
{ "RefDiscoveryPolicy", JDK_Version::undefined(), JDK_Version::jdk(21), JDK_Version::undefined() }, { "RefDiscoveryPolicy", JDK_Version::undefined(), JDK_Version::jdk(21), JDK_Version::undefined() },
{ "MetaspaceReclaimPolicy", JDK_Version::undefined(), JDK_Version::jdk(21), JDK_Version::undefined() }, { "MetaspaceReclaimPolicy", JDK_Version::undefined(), JDK_Version::jdk(21), JDK_Version::undefined() },
{ "DoReserveCopyInSuperWord", JDK_Version::undefined(), JDK_Version::jdk(22), JDK_Version::jdk(23) },
#ifdef ASSERT #ifdef ASSERT
{ "DummyObsoleteTestFlag", JDK_Version::undefined(), JDK_Version::jdk(18), JDK_Version::undefined() }, { "DummyObsoleteTestFlag", JDK_Version::undefined(), JDK_Version::jdk(18), JDK_Version::undefined() },