6810845: Performance regression in mpegaudio on x64
Used the outer loop frequency in frequencies checks in RA. Reviewed-by: never, twisti
This commit is contained in:
parent
3b786f8edc
commit
68cf08d2c3
@ -371,6 +371,7 @@ class PhaseCFG : public Phase {
|
|||||||
Block *_broot; // Basic block of root
|
Block *_broot; // Basic block of root
|
||||||
uint _rpo_ctr;
|
uint _rpo_ctr;
|
||||||
CFGLoop* _root_loop;
|
CFGLoop* _root_loop;
|
||||||
|
float _outer_loop_freq; // Outmost loop frequency
|
||||||
|
|
||||||
// Per node latency estimation, valid only during GCM
|
// Per node latency estimation, valid only during GCM
|
||||||
GrowableArray<uint> _node_latency;
|
GrowableArray<uint> _node_latency;
|
||||||
@ -537,6 +538,7 @@ class CFGLoop : public CFGElement {
|
|||||||
void compute_loop_depth(int depth);
|
void compute_loop_depth(int depth);
|
||||||
void compute_freq(); // compute frequency with loop assuming head freq 1.0f
|
void compute_freq(); // compute frequency with loop assuming head freq 1.0f
|
||||||
void scale_freq(); // scale frequency by loop trip count (including outer loops)
|
void scale_freq(); // scale frequency by loop trip count (including outer loops)
|
||||||
|
float outer_loop_freq() const; // frequency of outer loop
|
||||||
bool in_loop_nest(Block* b);
|
bool in_loop_nest(Block* b);
|
||||||
float trip_count() const { return 1.0f / _exit_prob; }
|
float trip_count() const { return 1.0f / _exit_prob; }
|
||||||
virtual bool is_loop() { return true; }
|
virtual bool is_loop() { return true; }
|
||||||
|
@ -391,7 +391,7 @@
|
|||||||
product(intx, EliminateAllocationArraySizeLimit, 64, \
|
product(intx, EliminateAllocationArraySizeLimit, 64, \
|
||||||
"Array size (number of elements) limit for scalar replacement") \
|
"Array size (number of elements) limit for scalar replacement") \
|
||||||
\
|
\
|
||||||
product(bool, UseOptoBiasInlining, true, \
|
product(bool, UseOptoBiasInlining, true, \
|
||||||
"Generate biased locking code in C2 ideal graph") \
|
"Generate biased locking code in C2 ideal graph") \
|
||||||
\
|
\
|
||||||
product(intx, ValueSearchLimit, 1000, \
|
product(intx, ValueSearchLimit, 1000, \
|
||||||
@ -410,7 +410,7 @@
|
|||||||
"Miniumum %% of a successor (predecessor) for which block layout "\
|
"Miniumum %% of a successor (predecessor) for which block layout "\
|
||||||
"a will allow a fork (join) in a single chain") \
|
"a will allow a fork (join) in a single chain") \
|
||||||
\
|
\
|
||||||
product(bool, BlockLayoutRotateLoops, false, \
|
product(bool, BlockLayoutRotateLoops, true, \
|
||||||
"Allow back branches to be fall throughs in the block layour") \
|
"Allow back branches to be fall throughs in the block layour") \
|
||||||
|
|
||||||
C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
|
C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
|
||||||
|
@ -149,6 +149,9 @@ PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
|
|||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )
|
NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )
|
||||||
|
|
||||||
|
_high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq);
|
||||||
|
|
||||||
uint i,j;
|
uint i,j;
|
||||||
// Build a list of basic blocks, sorted by frequency
|
// Build a list of basic blocks, sorted by frequency
|
||||||
_blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks );
|
_blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks );
|
||||||
|
@ -338,6 +338,8 @@ class PhaseChaitin : public PhaseRegAlloc {
|
|||||||
|
|
||||||
Block **_blks; // Array of blocks sorted by frequency for coalescing
|
Block **_blks; // Array of blocks sorted by frequency for coalescing
|
||||||
|
|
||||||
|
float _high_frequency_lrg; // Frequency at which LRG will be spilled for debug info
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
bool _trace_spilling;
|
bool _trace_spilling;
|
||||||
#endif
|
#endif
|
||||||
@ -360,6 +362,8 @@ public:
|
|||||||
|
|
||||||
uint n2lidx( const Node *n ) const { return _names[n->_idx]; }
|
uint n2lidx( const Node *n ) const { return _names[n->_idx]; }
|
||||||
|
|
||||||
|
float high_frequency_lrg() const { return _high_frequency_lrg; }
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
bool trace_spilling() const { return _trace_spilling; }
|
bool trace_spilling() const { return _trace_spilling; }
|
||||||
#endif
|
#endif
|
||||||
|
@ -473,7 +473,7 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
|
|||||||
} // End of is two-adr
|
} // End of is two-adr
|
||||||
|
|
||||||
// Insert a copy at a debug use for a lrg which has high frequency
|
// Insert a copy at a debug use for a lrg which has high frequency
|
||||||
if( (b->_freq < OPTO_DEBUG_SPLIT_FREQ) && n->is_MachSafePoint() ) {
|
if( b->_freq < OPTO_DEBUG_SPLIT_FREQ || b->is_uncommon(_phc._cfg._bbs) ) {
|
||||||
// Walk the debug inputs to the node and check for lrg freq
|
// Walk the debug inputs to the node and check for lrg freq
|
||||||
JVMState* jvms = n->jvms();
|
JVMState* jvms = n->jvms();
|
||||||
uint debug_start = jvms ? jvms->debug_start() : 999999;
|
uint debug_start = jvms ? jvms->debug_start() : 999999;
|
||||||
@ -487,7 +487,7 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
|
|||||||
LRG &lrg = lrgs(nidx);
|
LRG &lrg = lrgs(nidx);
|
||||||
|
|
||||||
// If this lrg has a high frequency use/def
|
// If this lrg has a high frequency use/def
|
||||||
if( lrg._maxfreq >= OPTO_LRG_HIGH_FREQ ) {
|
if( lrg._maxfreq >= _phc.high_frequency_lrg() ) {
|
||||||
// If the live range is also live out of this block (like it
|
// If the live range is also live out of this block (like it
|
||||||
// would be for a fast/slow idiom), the normal spill mechanism
|
// would be for a fast/slow idiom), the normal spill mechanism
|
||||||
// does an excellent job. If it is not live out of this block
|
// does an excellent job. If it is not live out of this block
|
||||||
|
@ -1374,6 +1374,9 @@ void PhaseCFG::Estimate_Block_Frequency() {
|
|||||||
_root_loop->_freq = 1.0;
|
_root_loop->_freq = 1.0;
|
||||||
_root_loop->scale_freq();
|
_root_loop->scale_freq();
|
||||||
|
|
||||||
|
// Save outmost loop frequency for LRG frequency threshold
|
||||||
|
_outer_loop_freq = _root_loop->outer_loop_freq();
|
||||||
|
|
||||||
// force paths ending at uncommon traps to be infrequent
|
// force paths ending at uncommon traps to be infrequent
|
||||||
if (!C->do_freq_based_layout()) {
|
if (!C->do_freq_based_layout()) {
|
||||||
Block_List worklist;
|
Block_List worklist;
|
||||||
@ -1898,6 +1901,7 @@ bool CFGLoop::in_loop_nest(Block* b) {
|
|||||||
// Do a top down traversal of loop tree (visit outer loops first.)
|
// Do a top down traversal of loop tree (visit outer loops first.)
|
||||||
void CFGLoop::scale_freq() {
|
void CFGLoop::scale_freq() {
|
||||||
float loop_freq = _freq * trip_count();
|
float loop_freq = _freq * trip_count();
|
||||||
|
_freq = loop_freq;
|
||||||
for (int i = 0; i < _members.length(); i++) {
|
for (int i = 0; i < _members.length(); i++) {
|
||||||
CFGElement* s = _members.at(i);
|
CFGElement* s = _members.at(i);
|
||||||
float block_freq = s->_freq * loop_freq;
|
float block_freq = s->_freq * loop_freq;
|
||||||
@ -1912,6 +1916,14 @@ void CFGLoop::scale_freq() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Frequency of outer loop
|
||||||
|
float CFGLoop::outer_loop_freq() const {
|
||||||
|
if (_child != NULL) {
|
||||||
|
return _child->_freq;
|
||||||
|
}
|
||||||
|
return _freq;
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
//------------------------------dump_tree--------------------------------------
|
//------------------------------dump_tree--------------------------------------
|
||||||
void CFGLoop::dump_tree() const {
|
void CFGLoop::dump_tree() const {
|
||||||
|
@ -340,6 +340,10 @@ const class TypePtr *MachNode::adr_type() const {
|
|||||||
if (base == NodeSentinel) return TypePtr::BOTTOM;
|
if (base == NodeSentinel) return TypePtr::BOTTOM;
|
||||||
|
|
||||||
const Type* t = base->bottom_type();
|
const Type* t = base->bottom_type();
|
||||||
|
if (UseCompressedOops && Universe::narrow_oop_shift() == 0) {
|
||||||
|
// 32-bit unscaled narrow oop can be the base of any address expression
|
||||||
|
t = t->make_ptr();
|
||||||
|
}
|
||||||
if (t->isa_intptr_t() && offset != 0 && offset != Type::OffsetBot) {
|
if (t->isa_intptr_t() && offset != 0 && offset != Type::OffsetBot) {
|
||||||
// We cannot assert that the offset does not look oop-ish here.
|
// We cannot assert that the offset does not look oop-ish here.
|
||||||
// Depending on the heap layout the cardmark base could land
|
// Depending on the heap layout the cardmark base could land
|
||||||
@ -353,6 +357,7 @@ const class TypePtr *MachNode::adr_type() const {
|
|||||||
|
|
||||||
// be conservative if we do not recognize the type
|
// be conservative if we do not recognize the type
|
||||||
if (tp == NULL) {
|
if (tp == NULL) {
|
||||||
|
assert(false, "this path may produce not optimal code");
|
||||||
return TypePtr::BOTTOM;
|
return TypePtr::BOTTOM;
|
||||||
}
|
}
|
||||||
assert(tp->base() != Type::AnyPtr, "not a bare pointer");
|
assert(tp->base() != Type::AnyPtr, "not a bare pointer");
|
||||||
|
Loading…
Reference in New Issue
Block a user