8023003: Cleanup the public interface to PhaseCFG
Public methods that don't need to be public should be private. Reviewed-by: kvn, twisti
This commit is contained in:
parent
5467bc74f8
commit
a235ecb344
@ -35,10 +35,6 @@
|
||||
#include "opto/rootnode.hpp"
|
||||
#include "utilities/copy.hpp"
|
||||
|
||||
// Optimization - Graph Style
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
void Block_Array::grow( uint i ) {
|
||||
assert(i >= Max(), "must be an overflow");
|
||||
debug_only(_limit = i+1);
|
||||
@ -54,7 +50,6 @@ void Block_Array::grow( uint i ) {
|
||||
Copy::zero_to_bytes( &_blocks[old], (_size-old)*sizeof(Block*) );
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
void Block_List::remove(uint i) {
|
||||
assert(i < _cnt, "index out of bounds");
|
||||
Copy::conjoint_words_to_lower((HeapWord*)&_blocks[i+1], (HeapWord*)&_blocks[i], ((_cnt-i-1)*sizeof(Block*)));
|
||||
@ -76,8 +71,6 @@ void Block_List::print() {
|
||||
}
|
||||
#endif
|
||||
|
||||
//=============================================================================
|
||||
|
||||
uint Block::code_alignment() {
|
||||
// Check for Root block
|
||||
if (_pre_order == 0) return CodeEntryAlignment;
|
||||
@ -113,7 +106,6 @@ uint Block::compute_loop_alignment() {
|
||||
return unit_sz; // no particular alignment
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Compute the size of first 'inst_cnt' instructions in this block.
|
||||
// Return the number of instructions left to compute if the block has
|
||||
// less then 'inst_cnt' instructions. Stop, and return 0 if sum_size
|
||||
@ -138,7 +130,6 @@ uint Block::compute_first_inst_size(uint& sum_size, uint inst_cnt,
|
||||
return inst_cnt;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
uint Block::find_node( const Node *n ) const {
|
||||
for( uint i = 0; i < _nodes.size(); i++ ) {
|
||||
if( _nodes[i] == n )
|
||||
@ -153,7 +144,6 @@ void Block::find_remove( const Node *n ) {
|
||||
_nodes.remove(find_node(n));
|
||||
}
|
||||
|
||||
//------------------------------is_Empty---------------------------------------
|
||||
// Return empty status of a block. Empty blocks contain only the head, other
|
||||
// ideal nodes, and an optional trailing goto.
|
||||
int Block::is_Empty() const {
|
||||
@ -192,7 +182,6 @@ int Block::is_Empty() const {
|
||||
return not_empty;
|
||||
}
|
||||
|
||||
//------------------------------has_uncommon_code------------------------------
|
||||
// Return true if the block's code implies that it is likely to be
|
||||
// executed infrequently. Check to see if the block ends in a Halt or
|
||||
// a low probability call.
|
||||
@ -218,7 +207,6 @@ bool Block::has_uncommon_code() const {
|
||||
return op == Op_Halt;
|
||||
}
|
||||
|
||||
//------------------------------is_uncommon------------------------------------
|
||||
// True if block is low enough frequency or guarded by a test which
|
||||
// mostly does not go here.
|
||||
bool Block::is_uncommon(PhaseCFG* cfg) const {
|
||||
@ -271,7 +259,6 @@ bool Block::is_uncommon(PhaseCFG* cfg) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
//------------------------------dump-------------------------------------------
|
||||
#ifndef PRODUCT
|
||||
void Block::dump_bidx(const Block* orig, outputStream* st) const {
|
||||
if (_pre_order) st->print("B%d",_pre_order);
|
||||
@ -364,13 +351,12 @@ void Block::dump(const PhaseCFG* cfg) const {
|
||||
}
|
||||
#endif
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------PhaseCFG---------------------------------------
|
||||
PhaseCFG::PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher)
|
||||
: Phase(CFG)
|
||||
, _block_arena(arena)
|
||||
, _node_to_block_mapping(arena)
|
||||
, _root(root)
|
||||
, _matcher(matcher)
|
||||
, _node_to_block_mapping(arena)
|
||||
, _node_latency(NULL)
|
||||
#ifndef PRODUCT
|
||||
, _trace_opto_pipelining(TraceOptoPipelining || C->method_has_option("TraceOptoPipelining"))
|
||||
@ -390,11 +376,10 @@ PhaseCFG::PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher)
|
||||
_goto->set_req(0,_goto);
|
||||
|
||||
// Build the CFG in Reverse Post Order
|
||||
_num_blocks = build_cfg();
|
||||
_broot = get_block_for_node(_root);
|
||||
_number_of_blocks = build_cfg();
|
||||
_root_block = get_block_for_node(_root);
|
||||
}
|
||||
|
||||
//------------------------------build_cfg--------------------------------------
|
||||
// Build a proper looking CFG. Make every block begin with either a StartNode
|
||||
// or a RegionNode. Make every block end with either a Goto, If or Return.
|
||||
// The RootNode both starts and ends it's own block. Do this with a recursive
|
||||
@ -496,13 +481,12 @@ uint PhaseCFG::build_cfg() {
|
||||
return sum;
|
||||
}
|
||||
|
||||
//------------------------------insert_goto_at---------------------------------
|
||||
// Inserts a goto & corresponding basic block between
|
||||
// block[block_no] and its succ_no'th successor block
|
||||
void PhaseCFG::insert_goto_at(uint block_no, uint succ_no) {
|
||||
// get block with block_no
|
||||
assert(block_no < _num_blocks, "illegal block number");
|
||||
Block* in = _blocks[block_no];
|
||||
assert(block_no < number_of_blocks(), "illegal block number");
|
||||
Block* in = get_block(block_no);
|
||||
// get successor block succ_no
|
||||
assert(succ_no < in->_num_succs, "illegal successor number");
|
||||
Block* out = in->_succs[succ_no];
|
||||
@ -537,11 +521,9 @@ void PhaseCFG::insert_goto_at(uint block_no, uint succ_no) {
|
||||
// Set the frequency of the new block
|
||||
block->_freq = freq;
|
||||
// add new basic block to basic block list
|
||||
_blocks.insert(block_no + 1, block);
|
||||
_num_blocks++;
|
||||
add_block_at(block_no + 1, block);
|
||||
}
|
||||
|
||||
//------------------------------no_flip_branch---------------------------------
|
||||
// Does this block end in a multiway branch that cannot have the default case
|
||||
// flipped for another case?
|
||||
static bool no_flip_branch( Block *b ) {
|
||||
@ -560,7 +542,6 @@ static bool no_flip_branch( Block *b ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
//------------------------------convert_NeverBranch_to_Goto--------------------
|
||||
// Check for NeverBranch at block end. This needs to become a GOTO to the
|
||||
// true target. NeverBranch are treated as a conditional branch that always
|
||||
// goes the same direction for most of the optimizer and are used to give a
|
||||
@ -598,7 +579,6 @@ void PhaseCFG::convert_NeverBranch_to_Goto(Block *b) {
|
||||
dead->_nodes[k]->del_req(j);
|
||||
}
|
||||
|
||||
//------------------------------move_to_next-----------------------------------
|
||||
// Helper function to move block bx to the slot following b_index. Return
|
||||
// true if the move is successful, otherwise false
|
||||
bool PhaseCFG::move_to_next(Block* bx, uint b_index) {
|
||||
@ -606,20 +586,22 @@ bool PhaseCFG::move_to_next(Block* bx, uint b_index) {
|
||||
|
||||
// Return false if bx is already scheduled.
|
||||
uint bx_index = bx->_pre_order;
|
||||
if ((bx_index <= b_index) && (_blocks[bx_index] == bx)) {
|
||||
if ((bx_index <= b_index) && (get_block(bx_index) == bx)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find the current index of block bx on the block list
|
||||
bx_index = b_index + 1;
|
||||
while( bx_index < _num_blocks && _blocks[bx_index] != bx ) bx_index++;
|
||||
assert(_blocks[bx_index] == bx, "block not found");
|
||||
while (bx_index < number_of_blocks() && get_block(bx_index) != bx) {
|
||||
bx_index++;
|
||||
}
|
||||
assert(get_block(bx_index) == bx, "block not found");
|
||||
|
||||
// If the previous block conditionally falls into bx, return false,
|
||||
// because moving bx will create an extra jump.
|
||||
for(uint k = 1; k < bx->num_preds(); k++ ) {
|
||||
Block* pred = get_block_for_node(bx->pred(k));
|
||||
if (pred == _blocks[bx_index-1]) {
|
||||
if (pred == get_block(bx_index - 1)) {
|
||||
if (pred->_num_succs != 1) {
|
||||
return false;
|
||||
}
|
||||
@ -632,7 +614,6 @@ bool PhaseCFG::move_to_next(Block* bx, uint b_index) {
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------move_to_end------------------------------------
|
||||
// Move empty and uncommon blocks to the end.
|
||||
void PhaseCFG::move_to_end(Block *b, uint i) {
|
||||
int e = b->is_Empty();
|
||||
@ -650,31 +631,31 @@ void PhaseCFG::move_to_end(Block *b, uint i) {
|
||||
_blocks.push(b);
|
||||
}
|
||||
|
||||
//---------------------------set_loop_alignment--------------------------------
|
||||
// Set loop alignment for every block
|
||||
void PhaseCFG::set_loop_alignment() {
|
||||
uint last = _num_blocks;
|
||||
assert( _blocks[0] == _broot, "" );
|
||||
uint last = number_of_blocks();
|
||||
assert(get_block(0) == get_root_block(), "");
|
||||
|
||||
for (uint i = 1; i < last; i++ ) {
|
||||
Block *b = _blocks[i];
|
||||
if (b->head()->is_Loop()) {
|
||||
b->set_loop_alignment(b);
|
||||
for (uint i = 1; i < last; i++) {
|
||||
Block* block = get_block(i);
|
||||
if (block->head()->is_Loop()) {
|
||||
block->set_loop_alignment(block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------remove_empty------------------------------------
|
||||
// Make empty basic blocks to be "connector" blocks, Move uncommon blocks
|
||||
// to the end.
|
||||
void PhaseCFG::remove_empty() {
|
||||
void PhaseCFG::remove_empty_blocks() {
|
||||
// Move uncommon blocks to the end
|
||||
uint last = _num_blocks;
|
||||
assert( _blocks[0] == _broot, "" );
|
||||
uint last = number_of_blocks();
|
||||
assert(get_block(0) == get_root_block(), "");
|
||||
|
||||
for (uint i = 1; i < last; i++) {
|
||||
Block *b = _blocks[i];
|
||||
if (b->is_connector()) break;
|
||||
Block* block = get_block(i);
|
||||
if (block->is_connector()) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Check for NeverBranch at block end. This needs to become a GOTO to the
|
||||
// true target. NeverBranch are treated as a conditional branch that
|
||||
@ -682,124 +663,127 @@ void PhaseCFG::remove_empty() {
|
||||
// to give a fake exit path to infinite loops. At this late stage they
|
||||
// need to turn into Goto's so that when you enter the infinite loop you
|
||||
// indeed hang.
|
||||
if( b->_nodes[b->end_idx()]->Opcode() == Op_NeverBranch )
|
||||
convert_NeverBranch_to_Goto(b);
|
||||
if (block->_nodes[block->end_idx()]->Opcode() == Op_NeverBranch) {
|
||||
convert_NeverBranch_to_Goto(block);
|
||||
}
|
||||
|
||||
// Look for uncommon blocks and move to end.
|
||||
if (!C->do_freq_based_layout()) {
|
||||
if (b->is_uncommon(this)) {
|
||||
move_to_end(b, i);
|
||||
if (block->is_uncommon(this)) {
|
||||
move_to_end(block, i);
|
||||
last--; // No longer check for being uncommon!
|
||||
if( no_flip_branch(b) ) { // Fall-thru case must follow?
|
||||
b = _blocks[i]; // Find the fall-thru block
|
||||
move_to_end(b, i);
|
||||
if (no_flip_branch(block)) { // Fall-thru case must follow?
|
||||
// Find the fall-thru block
|
||||
block = get_block(i);
|
||||
move_to_end(block, i);
|
||||
last--;
|
||||
}
|
||||
i--; // backup block counter post-increment
|
||||
// backup block counter post-increment
|
||||
i--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Move empty blocks to the end
|
||||
last = _num_blocks;
|
||||
last = number_of_blocks();
|
||||
for (uint i = 1; i < last; i++) {
|
||||
Block *b = _blocks[i];
|
||||
if (b->is_Empty() != Block::not_empty) {
|
||||
move_to_end(b, i);
|
||||
Block* block = get_block(i);
|
||||
if (block->is_Empty() != Block::not_empty) {
|
||||
move_to_end(block, i);
|
||||
last--;
|
||||
i--;
|
||||
}
|
||||
} // End of for all blocks
|
||||
}
|
||||
|
||||
//-----------------------------fixup_flow--------------------------------------
|
||||
// Fix up the final control flow for basic blocks.
|
||||
void PhaseCFG::fixup_flow() {
|
||||
// Fixup final control flow for the blocks. Remove jump-to-next
|
||||
// block. If neither arm of a IF follows the conditional branch, we
|
||||
// have to add a second jump after the conditional. We place the
|
||||
// TRUE branch target in succs[0] for both GOTOs and IFs.
|
||||
for (uint i=0; i < _num_blocks; i++) {
|
||||
Block *b = _blocks[i];
|
||||
b->_pre_order = i; // turn pre-order into block-index
|
||||
for (uint i = 0; i < number_of_blocks(); i++) {
|
||||
Block* block = get_block(i);
|
||||
block->_pre_order = i; // turn pre-order into block-index
|
||||
|
||||
// Connector blocks need no further processing.
|
||||
if (b->is_connector()) {
|
||||
assert((i+1) == _num_blocks || _blocks[i+1]->is_connector(),
|
||||
"All connector blocks should sink to the end");
|
||||
if (block->is_connector()) {
|
||||
assert((i+1) == number_of_blocks() || get_block(i + 1)->is_connector(), "All connector blocks should sink to the end");
|
||||
continue;
|
||||
}
|
||||
assert(b->is_Empty() != Block::completely_empty,
|
||||
"Empty blocks should be connectors");
|
||||
assert(block->is_Empty() != Block::completely_empty, "Empty blocks should be connectors");
|
||||
|
||||
Block *bnext = (i < _num_blocks-1) ? _blocks[i+1] : NULL;
|
||||
Block *bs0 = b->non_connector_successor(0);
|
||||
Block* bnext = (i < number_of_blocks() - 1) ? get_block(i + 1) : NULL;
|
||||
Block* bs0 = block->non_connector_successor(0);
|
||||
|
||||
// Check for multi-way branches where I cannot negate the test to
|
||||
// exchange the true and false targets.
|
||||
if( no_flip_branch( b ) ) {
|
||||
if (no_flip_branch(block)) {
|
||||
// Find fall through case - if must fall into its target
|
||||
int branch_idx = b->_nodes.size() - b->_num_succs;
|
||||
for (uint j2 = 0; j2 < b->_num_succs; j2++) {
|
||||
const ProjNode* p = b->_nodes[branch_idx + j2]->as_Proj();
|
||||
int branch_idx = block->_nodes.size() - block->_num_succs;
|
||||
for (uint j2 = 0; j2 < block->_num_succs; j2++) {
|
||||
const ProjNode* p = block->_nodes[branch_idx + j2]->as_Proj();
|
||||
if (p->_con == 0) {
|
||||
// successor j2 is fall through case
|
||||
if (b->non_connector_successor(j2) != bnext) {
|
||||
if (block->non_connector_successor(j2) != bnext) {
|
||||
// but it is not the next block => insert a goto
|
||||
insert_goto_at(i, j2);
|
||||
}
|
||||
// Put taken branch in slot 0
|
||||
if( j2 == 0 && b->_num_succs == 2) {
|
||||
if (j2 == 0 && block->_num_succs == 2) {
|
||||
// Flip targets in succs map
|
||||
Block *tbs0 = b->_succs[0];
|
||||
Block *tbs1 = b->_succs[1];
|
||||
b->_succs.map( 0, tbs1 );
|
||||
b->_succs.map( 1, tbs0 );
|
||||
Block *tbs0 = block->_succs[0];
|
||||
Block *tbs1 = block->_succs[1];
|
||||
block->_succs.map(0, tbs1);
|
||||
block->_succs.map(1, tbs0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Remove all CatchProjs
|
||||
for (uint j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop();
|
||||
|
||||
} else if (b->_num_succs == 1) {
|
||||
// Remove all CatchProjs
|
||||
for (uint j = 0; j < block->_num_succs; j++) {
|
||||
block->_nodes.pop();
|
||||
}
|
||||
|
||||
} else if (block->_num_succs == 1) {
|
||||
// Block ends in a Goto?
|
||||
if (bnext == bs0) {
|
||||
// We fall into next block; remove the Goto
|
||||
b->_nodes.pop();
|
||||
block->_nodes.pop();
|
||||
}
|
||||
|
||||
} else if( b->_num_succs == 2 ) { // Block ends in a If?
|
||||
} else if(block->_num_succs == 2) { // Block ends in a If?
|
||||
// Get opcode of 1st projection (matches _succs[0])
|
||||
// Note: Since this basic block has 2 exits, the last 2 nodes must
|
||||
// be projections (in any order), the 3rd last node must be
|
||||
// the IfNode (we have excluded other 2-way exits such as
|
||||
// CatchNodes already).
|
||||
MachNode *iff = b->_nodes[b->_nodes.size()-3]->as_Mach();
|
||||
ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj();
|
||||
ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj();
|
||||
MachNode* iff = block->_nodes[block->_nodes.size() - 3]->as_Mach();
|
||||
ProjNode* proj0 = block->_nodes[block->_nodes.size() - 2]->as_Proj();
|
||||
ProjNode* proj1 = block->_nodes[block->_nodes.size() - 1]->as_Proj();
|
||||
|
||||
// Assert that proj0 and succs[0] match up. Similarly for proj1 and succs[1].
|
||||
assert(proj0->raw_out(0) == b->_succs[0]->head(), "Mismatch successor 0");
|
||||
assert(proj1->raw_out(0) == b->_succs[1]->head(), "Mismatch successor 1");
|
||||
assert(proj0->raw_out(0) == block->_succs[0]->head(), "Mismatch successor 0");
|
||||
assert(proj1->raw_out(0) == block->_succs[1]->head(), "Mismatch successor 1");
|
||||
|
||||
Block *bs1 = b->non_connector_successor(1);
|
||||
Block* bs1 = block->non_connector_successor(1);
|
||||
|
||||
// Check for neither successor block following the current
|
||||
// block ending in a conditional. If so, move one of the
|
||||
// successors after the current one, provided that the
|
||||
// successor was previously unscheduled, but moveable
|
||||
// (i.e., all paths to it involve a branch).
|
||||
if( !C->do_freq_based_layout() && bnext != bs0 && bnext != bs1 ) {
|
||||
if (!C->do_freq_based_layout() && bnext != bs0 && bnext != bs1) {
|
||||
// Choose the more common successor based on the probability
|
||||
// of the conditional branch.
|
||||
Block *bx = bs0;
|
||||
Block *by = bs1;
|
||||
Block* bx = bs0;
|
||||
Block* by = bs1;
|
||||
|
||||
// _prob is the probability of taking the true path. Make
|
||||
// p the probability of taking successor #1.
|
||||
float p = iff->as_MachIf()->_prob;
|
||||
if( proj0->Opcode() == Op_IfTrue ) {
|
||||
if (proj0->Opcode() == Op_IfTrue) {
|
||||
p = 1.0 - p;
|
||||
}
|
||||
|
||||
@ -826,14 +810,16 @@ void PhaseCFG::fixup_flow() {
|
||||
// succs[1].
|
||||
if (bnext == bs0) {
|
||||
// Fall-thru case in succs[0], so flip targets in succs map
|
||||
Block *tbs0 = b->_succs[0];
|
||||
Block *tbs1 = b->_succs[1];
|
||||
b->_succs.map( 0, tbs1 );
|
||||
b->_succs.map( 1, tbs0 );
|
||||
Block* tbs0 = block->_succs[0];
|
||||
Block* tbs1 = block->_succs[1];
|
||||
block->_succs.map(0, tbs1);
|
||||
block->_succs.map(1, tbs0);
|
||||
// Flip projection for each target
|
||||
{ ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; }
|
||||
ProjNode* tmp = proj0;
|
||||
proj0 = proj1;
|
||||
proj1 = tmp;
|
||||
|
||||
} else if( bnext != bs1 ) {
|
||||
} else if(bnext != bs1) {
|
||||
// Need a double-branch
|
||||
// The existing conditional branch need not change.
|
||||
// Add a unconditional branch to the false target.
|
||||
@ -843,12 +829,12 @@ void PhaseCFG::fixup_flow() {
|
||||
}
|
||||
|
||||
// Make sure we TRUE branch to the target
|
||||
if( proj0->Opcode() == Op_IfFalse ) {
|
||||
if (proj0->Opcode() == Op_IfFalse) {
|
||||
iff->as_MachIf()->negate();
|
||||
}
|
||||
|
||||
b->_nodes.pop(); // Remove IfFalse & IfTrue projections
|
||||
b->_nodes.pop();
|
||||
block->_nodes.pop(); // Remove IfFalse & IfTrue projections
|
||||
block->_nodes.pop();
|
||||
|
||||
} else {
|
||||
// Multi-exit block, e.g. a switch statement
|
||||
@ -858,7 +844,6 @@ void PhaseCFG::fixup_flow() {
|
||||
}
|
||||
|
||||
|
||||
//------------------------------dump-------------------------------------------
|
||||
#ifndef PRODUCT
|
||||
void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited ) const {
|
||||
const Node *x = end->is_block_proj();
|
||||
@ -884,10 +869,11 @@ void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited ) const {
|
||||
}
|
||||
|
||||
void PhaseCFG::dump( ) const {
|
||||
tty->print("\n--- CFG --- %d BBs\n",_num_blocks);
|
||||
tty->print("\n--- CFG --- %d BBs\n", number_of_blocks());
|
||||
if (_blocks.size()) { // Did we do basic-block layout?
|
||||
for (uint i = 0; i < _num_blocks; i++) {
|
||||
_blocks[i]->dump(this);
|
||||
for (uint i = 0; i < number_of_blocks(); i++) {
|
||||
const Block* block = get_block(i);
|
||||
block->dump(this);
|
||||
}
|
||||
} else { // Else do it with a DFS
|
||||
VectorSet visited(_block_arena);
|
||||
@ -896,27 +882,26 @@ void PhaseCFG::dump( ) const {
|
||||
}
|
||||
|
||||
void PhaseCFG::dump_headers() {
|
||||
for( uint i = 0; i < _num_blocks; i++ ) {
|
||||
if (_blocks[i]) {
|
||||
_blocks[i]->dump_head(this);
|
||||
for (uint i = 0; i < number_of_blocks(); i++) {
|
||||
Block* block = get_block(i);
|
||||
if (block != NULL) {
|
||||
block->dump_head(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseCFG::verify( ) const {
|
||||
void PhaseCFG::verify() const {
|
||||
#ifdef ASSERT
|
||||
// Verify sane CFG
|
||||
for (uint i = 0; i < _num_blocks; i++) {
|
||||
Block *b = _blocks[i];
|
||||
uint cnt = b->_nodes.size();
|
||||
for (uint i = 0; i < number_of_blocks(); i++) {
|
||||
Block* block = get_block(i);
|
||||
uint cnt = block->_nodes.size();
|
||||
uint j;
|
||||
for (j = 0; j < cnt; j++) {
|
||||
Node *n = b->_nodes[j];
|
||||
assert(get_block_for_node(n) == b, "");
|
||||
if (j >= 1 && n->is_Mach() &&
|
||||
n->as_Mach()->ideal_Opcode() == Op_CreateEx) {
|
||||
assert(j == 1 || b->_nodes[j-1]->is_Phi(),
|
||||
"CreateEx must be first instruction in block");
|
||||
Node *n = block->_nodes[j];
|
||||
assert(get_block_for_node(n) == block, "");
|
||||
if (j >= 1 && n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CreateEx) {
|
||||
assert(j == 1 || block->_nodes[j-1]->is_Phi(), "CreateEx must be first instruction in block");
|
||||
}
|
||||
for (uint k = 0; k < n->req(); k++) {
|
||||
Node *def = n->in(k);
|
||||
@ -926,8 +911,7 @@ void PhaseCFG::verify( ) const {
|
||||
// Uses must follow their definition if they are at the same block.
|
||||
// Mostly done to check that MachSpillCopy nodes are placed correctly
|
||||
// when CreateEx node is moved in build_ifg_physical().
|
||||
if (get_block_for_node(def) == b &&
|
||||
!(b->head()->is_Loop() && n->is_Phi()) &&
|
||||
if (get_block_for_node(def) == block && !(block->head()->is_Loop() && n->is_Phi()) &&
|
||||
// See (+++) comment in reg_split.cpp
|
||||
!(n->jvms() != NULL && n->jvms()->is_monitor_use(k))) {
|
||||
bool is_loop = false;
|
||||
@ -939,29 +923,29 @@ void PhaseCFG::verify( ) const {
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(is_loop || b->find_node(def) < j, "uses must follow definitions");
|
||||
assert(is_loop || block->find_node(def) < j, "uses must follow definitions");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
j = b->end_idx();
|
||||
Node *bp = (Node*)b->_nodes[b->_nodes.size()-1]->is_block_proj();
|
||||
assert( bp, "last instruction must be a block proj" );
|
||||
assert( bp == b->_nodes[j], "wrong number of successors for this block" );
|
||||
j = block->end_idx();
|
||||
Node* bp = (Node*)block->_nodes[block->_nodes.size() - 1]->is_block_proj();
|
||||
assert(bp, "last instruction must be a block proj");
|
||||
assert(bp == block->_nodes[j], "wrong number of successors for this block");
|
||||
if (bp->is_Catch()) {
|
||||
while (b->_nodes[--j]->is_MachProj()) ;
|
||||
assert(b->_nodes[j]->is_MachCall(), "CatchProj must follow call");
|
||||
while (block->_nodes[--j]->is_MachProj()) {
|
||||
;
|
||||
}
|
||||
assert(block->_nodes[j]->is_MachCall(), "CatchProj must follow call");
|
||||
} else if (bp->is_Mach() && bp->as_Mach()->ideal_Opcode() == Op_If) {
|
||||
assert(b->_num_succs == 2, "Conditional branch must have two targets");
|
||||
assert(block->_num_succs == 2, "Conditional branch must have two targets");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------UnionFind--------------------------------------
|
||||
UnionFind::UnionFind( uint max ) : _cnt(max), _max(max), _indices(NEW_RESOURCE_ARRAY(uint,max)) {
|
||||
Copy::zero_to_bytes( _indices, sizeof(uint)*max );
|
||||
}
|
||||
@ -986,7 +970,6 @@ void UnionFind::reset( uint max ) {
|
||||
for( uint i=0; i<max; i++ ) map(i,i);
|
||||
}
|
||||
|
||||
//------------------------------Find_compress----------------------------------
|
||||
// Straight out of Tarjan's union-find algorithm
|
||||
uint UnionFind::Find_compress( uint idx ) {
|
||||
uint cur = idx;
|
||||
@ -1006,7 +989,6 @@ uint UnionFind::Find_compress( uint idx ) {
|
||||
return idx;
|
||||
}
|
||||
|
||||
//------------------------------Find_const-------------------------------------
|
||||
// Like Find above, but no path compress, so bad asymptotic behavior
|
||||
uint UnionFind::Find_const( uint idx ) const {
|
||||
if( idx == 0 ) return idx; // Ignore the zero idx
|
||||
@ -1021,7 +1003,6 @@ uint UnionFind::Find_const( uint idx ) const {
|
||||
return next;
|
||||
}
|
||||
|
||||
//------------------------------Union------------------------------------------
|
||||
// union 2 sets together.
|
||||
void UnionFind::Union( uint idx1, uint idx2 ) {
|
||||
uint src = Find(idx1);
|
||||
@ -1070,9 +1051,6 @@ void CFGEdge::dump( ) const {
|
||||
}
|
||||
#endif
|
||||
|
||||
//=============================================================================
|
||||
|
||||
//------------------------------edge_order-------------------------------------
|
||||
// Comparison function for edges
|
||||
static int edge_order(CFGEdge **e0, CFGEdge **e1) {
|
||||
float freq0 = (*e0)->freq();
|
||||
@ -1087,7 +1065,6 @@ static int edge_order(CFGEdge **e0, CFGEdge **e1) {
|
||||
return dist1 - dist0;
|
||||
}
|
||||
|
||||
//------------------------------trace_frequency_order--------------------------
|
||||
// Comparison function for edges
|
||||
extern "C" int trace_frequency_order(const void *p0, const void *p1) {
|
||||
Trace *tr0 = *(Trace **) p0;
|
||||
@ -1113,17 +1090,15 @@ extern "C" int trace_frequency_order(const void *p0, const void *p1) {
|
||||
return diff;
|
||||
}
|
||||
|
||||
//------------------------------find_edges-------------------------------------
|
||||
// Find edges of interest, i.e, those which can fall through. Presumes that
|
||||
// edges which don't fall through are of low frequency and can be generally
|
||||
// ignored. Initialize the list of traces.
|
||||
void PhaseBlockLayout::find_edges()
|
||||
{
|
||||
void PhaseBlockLayout::find_edges() {
|
||||
// Walk the blocks, creating edges and Traces
|
||||
uint i;
|
||||
Trace *tr = NULL;
|
||||
for (i = 0; i < _cfg._num_blocks; i++) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
for (i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
Block* b = _cfg.get_block(i);
|
||||
tr = new Trace(b, next, prev);
|
||||
traces[tr->id()] = tr;
|
||||
|
||||
@ -1147,7 +1122,7 @@ void PhaseBlockLayout::find_edges()
|
||||
if (n->num_preds() != 1) break;
|
||||
|
||||
i++;
|
||||
assert(n = _cfg._blocks[i], "expecting next block");
|
||||
assert(n = _cfg.get_block(i), "expecting next block");
|
||||
tr->append(n);
|
||||
uf->map(n->_pre_order, tr->id());
|
||||
traces[n->_pre_order] = NULL;
|
||||
@ -1171,8 +1146,8 @@ void PhaseBlockLayout::find_edges()
|
||||
}
|
||||
|
||||
// Group connector blocks into one trace
|
||||
for (i++; i < _cfg._num_blocks; i++) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
for (i++; i < _cfg.number_of_blocks(); i++) {
|
||||
Block *b = _cfg.get_block(i);
|
||||
assert(b->is_connector(), "connector blocks at the end");
|
||||
tr->append(b);
|
||||
uf->map(b->_pre_order, tr->id());
|
||||
@ -1180,10 +1155,8 @@ void PhaseBlockLayout::find_edges()
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------union_traces----------------------------------
|
||||
// Union two traces together in uf, and null out the trace in the list
|
||||
void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace)
|
||||
{
|
||||
void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace) {
|
||||
uint old_id = old_trace->id();
|
||||
uint updated_id = updated_trace->id();
|
||||
|
||||
@ -1207,10 +1180,8 @@ void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace)
|
||||
traces[hi_id] = NULL;
|
||||
}
|
||||
|
||||
//------------------------------grow_traces-------------------------------------
|
||||
// Append traces together via the most frequently executed edges
|
||||
void PhaseBlockLayout::grow_traces()
|
||||
{
|
||||
void PhaseBlockLayout::grow_traces() {
|
||||
// Order the edges, and drive the growth of Traces via the most
|
||||
// frequently executed edges.
|
||||
edges->sort(edge_order);
|
||||
@ -1252,11 +1223,9 @@ void PhaseBlockLayout::grow_traces()
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------merge_traces-----------------------------------
|
||||
// Embed one trace into another, if the fork or join points are sufficiently
|
||||
// balanced.
|
||||
void PhaseBlockLayout::merge_traces(bool fall_thru_only)
|
||||
{
|
||||
void PhaseBlockLayout::merge_traces(bool fall_thru_only) {
|
||||
// Walk the edge list a another time, looking at unprocessed edges.
|
||||
// Fold in diamonds
|
||||
for (int i = 0; i < edges->length(); i++) {
|
||||
@ -1310,7 +1279,7 @@ void PhaseBlockLayout::merge_traces(bool fall_thru_only)
|
||||
src_trace->insert_after(src_block, targ_trace);
|
||||
union_traces(src_trace, targ_trace);
|
||||
} else if (src_at_tail) {
|
||||
if (src_trace != trace(_cfg._broot)) {
|
||||
if (src_trace != trace(_cfg.get_root_block())) {
|
||||
e->set_state(CFGEdge::connected);
|
||||
targ_trace->insert_before(targ_block, src_trace);
|
||||
union_traces(targ_trace, src_trace);
|
||||
@ -1319,7 +1288,7 @@ void PhaseBlockLayout::merge_traces(bool fall_thru_only)
|
||||
} else if (e->state() == CFGEdge::open) {
|
||||
// Append traces, even without a fall-thru connection.
|
||||
// But leave root entry at the beginning of the block list.
|
||||
if (targ_trace != trace(_cfg._broot)) {
|
||||
if (targ_trace != trace(_cfg.get_root_block())) {
|
||||
e->set_state(CFGEdge::connected);
|
||||
src_trace->append(targ_trace);
|
||||
union_traces(src_trace, targ_trace);
|
||||
@ -1328,11 +1297,9 @@ void PhaseBlockLayout::merge_traces(bool fall_thru_only)
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------reorder_traces-----------------------------------
|
||||
// Order the sequence of the traces in some desirable way, and fixup the
|
||||
// jumps at the end of each block.
|
||||
void PhaseBlockLayout::reorder_traces(int count)
|
||||
{
|
||||
void PhaseBlockLayout::reorder_traces(int count) {
|
||||
ResourceArea *area = Thread::current()->resource_area();
|
||||
Trace ** new_traces = NEW_ARENA_ARRAY(area, Trace *, count);
|
||||
Block_List worklist;
|
||||
@ -1347,15 +1314,14 @@ void PhaseBlockLayout::reorder_traces(int count)
|
||||
}
|
||||
|
||||
// The entry block should be first on the new trace list.
|
||||
Trace *tr = trace(_cfg._broot);
|
||||
Trace *tr = trace(_cfg.get_root_block());
|
||||
assert(tr == new_traces[0], "entry trace misplaced");
|
||||
|
||||
// Sort the new trace list by frequency
|
||||
qsort(new_traces + 1, new_count - 1, sizeof(new_traces[0]), trace_frequency_order);
|
||||
|
||||
// Patch up the successor blocks
|
||||
_cfg._blocks.reset();
|
||||
_cfg._num_blocks = 0;
|
||||
_cfg.clear_blocks();
|
||||
for (int i = 0; i < new_count; i++) {
|
||||
Trace *tr = new_traces[i];
|
||||
if (tr != NULL) {
|
||||
@ -1364,17 +1330,15 @@ void PhaseBlockLayout::reorder_traces(int count)
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------PhaseBlockLayout-------------------------------
|
||||
// Order basic blocks based on frequency
|
||||
PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) :
|
||||
Phase(BlockLayout),
|
||||
_cfg(cfg)
|
||||
{
|
||||
PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg)
|
||||
: Phase(BlockLayout)
|
||||
, _cfg(cfg) {
|
||||
ResourceMark rm;
|
||||
ResourceArea *area = Thread::current()->resource_area();
|
||||
|
||||
// List of traces
|
||||
int size = _cfg._num_blocks + 1;
|
||||
int size = _cfg.number_of_blocks() + 1;
|
||||
traces = NEW_ARENA_ARRAY(area, Trace *, size);
|
||||
memset(traces, 0, size*sizeof(Trace*));
|
||||
next = NEW_ARENA_ARRAY(area, Block *, size);
|
||||
@ -1407,11 +1371,10 @@ PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) :
|
||||
// Re-order all the remaining traces by frequency
|
||||
reorder_traces(size);
|
||||
|
||||
assert(_cfg._num_blocks >= (uint) (size - 1), "number of blocks can not shrink");
|
||||
assert(_cfg.number_of_blocks() >= (uint) (size - 1), "number of blocks can not shrink");
|
||||
}
|
||||
|
||||
|
||||
//------------------------------backedge---------------------------------------
|
||||
// Edge e completes a loop in a trace. If the target block is head of the
|
||||
// loop, rotate the loop block so that the loop ends in a conditional branch.
|
||||
bool Trace::backedge(CFGEdge *e) {
|
||||
@ -1463,14 +1426,12 @@ bool Trace::backedge(CFGEdge *e) {
|
||||
return loop_rotated;
|
||||
}
|
||||
|
||||
//------------------------------fixup_blocks-----------------------------------
|
||||
// push blocks onto the CFG list
|
||||
// ensure that blocks have the correct two-way branch sense
|
||||
void Trace::fixup_blocks(PhaseCFG &cfg) {
|
||||
Block *last = last_block();
|
||||
for (Block *b = first_block(); b != NULL; b = next(b)) {
|
||||
cfg._blocks.push(b);
|
||||
cfg._num_blocks++;
|
||||
cfg.add_block(b);
|
||||
if (!b->is_connector()) {
|
||||
int nfallthru = b->num_fall_throughs();
|
||||
if (b != last) {
|
||||
|
@ -348,20 +348,77 @@ class Block : public CFGElement {
|
||||
class PhaseCFG : public Phase {
|
||||
friend class VMStructs;
|
||||
private:
|
||||
|
||||
// Root of whole program
|
||||
RootNode* _root;
|
||||
|
||||
// The block containing the root node
|
||||
Block* _root_block;
|
||||
|
||||
// List of basic blocks that are created during CFG creation
|
||||
Block_List _blocks;
|
||||
|
||||
// Count of basic blocks
|
||||
uint _number_of_blocks;
|
||||
|
||||
// Arena for the blocks to be stored in
|
||||
Arena* _block_arena;
|
||||
|
||||
// The matcher for this compilation
|
||||
Matcher& _matcher;
|
||||
|
||||
// Map nodes to owning basic block
|
||||
Block_Array _node_to_block_mapping;
|
||||
|
||||
// Loop from the root
|
||||
CFGLoop* _root_loop;
|
||||
|
||||
// Outmost loop frequency
|
||||
float _outer_loop_frequency;
|
||||
|
||||
// Per node latency estimation, valid only during GCM
|
||||
GrowableArray<uint>* _node_latency;
|
||||
|
||||
// Build a proper looking cfg. Return count of basic blocks
|
||||
uint build_cfg();
|
||||
|
||||
// Perform DFS search.
|
||||
// Build the dominator tree so that we know where we can move instructions
|
||||
void build_dominator_tree();
|
||||
|
||||
// Estimate block frequencies based on IfNode probabilities, so that we know where we want to move instructions
|
||||
void estimate_block_frequency();
|
||||
|
||||
// Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific
|
||||
// basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block.
|
||||
// Move nodes to ensure correctness from GVN and also try to move nodes out of loops.
|
||||
void global_code_motion();
|
||||
|
||||
// Schedule Nodes early in their basic blocks.
|
||||
bool schedule_early(VectorSet &visited, Node_List &roots);
|
||||
|
||||
// For each node, find the latest block it can be scheduled into
|
||||
// and then select the cheapest block between the latest and earliest
|
||||
// block to place the node.
|
||||
void schedule_late(VectorSet &visited, Node_List &stack);
|
||||
|
||||
// Compute the (backwards) latency of a node from a single use
|
||||
int latency_from_use(Node *n, const Node *def, Node *use);
|
||||
|
||||
// Compute the (backwards) latency of a node from the uses of this instruction
|
||||
void partial_latency_of_defs(Node *n);
|
||||
|
||||
// Compute the instruction global latency with a backwards walk
|
||||
void compute_latencies_backwards(VectorSet &visited, Node_List &stack);
|
||||
|
||||
// Pick a block between early and late that is a cheaper alternative
|
||||
// to late. Helper for schedule_late.
|
||||
Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
|
||||
|
||||
// Perform a Depth First Search (DFS).
|
||||
// Setup 'vertex' as DFS to vertex mapping.
|
||||
// Setup 'semi' as vertex to DFS mapping.
|
||||
// Set 'parent' to DFS parent.
|
||||
uint DFS( Tarjan *tarjan );
|
||||
uint do_DFS(Tarjan* tarjan, uint rpo_counter);
|
||||
|
||||
// Helper function to insert a node into a block
|
||||
void schedule_node_into_block( Node *n, Block *b );
|
||||
@ -372,7 +429,8 @@ class PhaseCFG : public Phase {
|
||||
void schedule_pinned_nodes( VectorSet &visited );
|
||||
|
||||
// I'll need a few machine-specific GotoNodes. Clone from this one.
|
||||
MachNode *_goto;
|
||||
// Used when building the CFG and creating end nodes for blocks.
|
||||
MachNode* _goto;
|
||||
|
||||
Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false);
|
||||
void verify_anti_dependences(Block* LCA, Node* load) {
|
||||
@ -380,17 +438,77 @@ class PhaseCFG : public Phase {
|
||||
insert_anti_dependences(LCA, load, true);
|
||||
}
|
||||
|
||||
bool move_to_next(Block* bx, uint b_index);
|
||||
void move_to_end(Block* bx, uint b_index);
|
||||
|
||||
void insert_goto_at(uint block_no, uint succ_no);
|
||||
|
||||
// Check for NeverBranch at block end. This needs to become a GOTO to the
|
||||
// true target. NeverBranch are treated as a conditional branch that always
|
||||
// goes the same direction for most of the optimizer and are used to give a
|
||||
// fake exit path to infinite loops. At this late stage they need to turn
|
||||
// into Goto's so that when you enter the infinite loop you indeed hang.
|
||||
void convert_NeverBranch_to_Goto(Block *b);
|
||||
|
||||
CFGLoop* create_loop_tree();
|
||||
|
||||
#ifndef PRODUCT
|
||||
bool _trace_opto_pipelining; // tracing flag
|
||||
#endif
|
||||
|
||||
public:
|
||||
PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher);
|
||||
|
||||
uint _num_blocks; // Count of basic blocks
|
||||
Block_List _blocks; // List of basic blocks
|
||||
RootNode *_root; // Root of whole program
|
||||
Block *_broot; // Basic block of root
|
||||
uint _rpo_ctr;
|
||||
CFGLoop* _root_loop;
|
||||
float _outer_loop_freq; // Outmost loop frequency
|
||||
void set_latency_for_node(Node* node, int latency) {
|
||||
_node_latency->at_put_grow(node->_idx, latency);
|
||||
}
|
||||
|
||||
uint get_latency_for_node(Node* node) {
|
||||
return _node_latency->at_grow(node->_idx);
|
||||
}
|
||||
|
||||
// Get the outer most frequency
|
||||
float get_outer_loop_frequency() const {
|
||||
return _outer_loop_frequency;
|
||||
}
|
||||
|
||||
// Get the root node of the CFG
|
||||
RootNode* get_root_node() const {
|
||||
return _root;
|
||||
}
|
||||
|
||||
// Get the block of the root node
|
||||
Block* get_root_block() const {
|
||||
return _root_block;
|
||||
}
|
||||
|
||||
// Add a block at a position and moves the later ones one step
|
||||
void add_block_at(uint pos, Block* block) {
|
||||
_blocks.insert(pos, block);
|
||||
_number_of_blocks++;
|
||||
}
|
||||
|
||||
// Adds a block to the top of the block list
|
||||
void add_block(Block* block) {
|
||||
_blocks.push(block);
|
||||
_number_of_blocks++;
|
||||
}
|
||||
|
||||
// Clear the list of blocks
|
||||
void clear_blocks() {
|
||||
_blocks.reset();
|
||||
_number_of_blocks = 0;
|
||||
}
|
||||
|
||||
// Get the block at position pos in _blocks
|
||||
Block* get_block(uint pos) const {
|
||||
return _blocks[pos];
|
||||
}
|
||||
|
||||
// Number of blocks
|
||||
uint number_of_blocks() const {
|
||||
return _number_of_blocks;
|
||||
}
|
||||
|
||||
// set which block this node should reside in
|
||||
void map_node_to_block(const Node* node, Block* block) {
|
||||
@ -412,72 +530,26 @@ class PhaseCFG : public Phase {
|
||||
return (_node_to_block_mapping.lookup(node->_idx) != NULL);
|
||||
}
|
||||
|
||||
// Per node latency estimation, valid only during GCM
|
||||
GrowableArray<uint> *_node_latency;
|
||||
|
||||
#ifndef PRODUCT
|
||||
bool _trace_opto_pipelining; // tracing flag
|
||||
#endif
|
||||
|
||||
#ifdef ASSERT
|
||||
Unique_Node_List _raw_oops;
|
||||
#endif
|
||||
|
||||
// Build dominators
|
||||
void Dominators();
|
||||
|
||||
// Estimate block frequencies based on IfNode probabilities
|
||||
void Estimate_Block_Frequency();
|
||||
|
||||
// Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific
|
||||
// basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block.
|
||||
void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list );
|
||||
// Do global code motion by first building dominator tree and estimate block frequency
|
||||
// Returns true on success
|
||||
bool do_global_code_motion();
|
||||
|
||||
// Compute the (backwards) latency of a node from the uses
|
||||
void latency_from_uses(Node *n);
|
||||
|
||||
// Compute the (backwards) latency of a node from a single use
|
||||
int latency_from_use(Node *n, const Node *def, Node *use);
|
||||
|
||||
// Compute the (backwards) latency of a node from the uses of this instruction
|
||||
void partial_latency_of_defs(Node *n);
|
||||
|
||||
// Schedule Nodes early in their basic blocks.
|
||||
bool schedule_early(VectorSet &visited, Node_List &roots);
|
||||
|
||||
// For each node, find the latest block it can be scheduled into
|
||||
// and then select the cheapest block between the latest and earliest
|
||||
// block to place the node.
|
||||
void schedule_late(VectorSet &visited, Node_List &stack);
|
||||
|
||||
// Pick a block between early and late that is a cheaper alternative
|
||||
// to late. Helper for schedule_late.
|
||||
Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
|
||||
|
||||
// Compute the instruction global latency with a backwards walk
|
||||
void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack);
|
||||
|
||||
// Set loop alignment
|
||||
void set_loop_alignment();
|
||||
|
||||
// Remove empty basic blocks
|
||||
void remove_empty();
|
||||
void remove_empty_blocks();
|
||||
void fixup_flow();
|
||||
bool move_to_next(Block* bx, uint b_index);
|
||||
void move_to_end(Block* bx, uint b_index);
|
||||
void insert_goto_at(uint block_no, uint succ_no);
|
||||
|
||||
// Check for NeverBranch at block end. This needs to become a GOTO to the
|
||||
// true target. NeverBranch are treated as a conditional branch that always
|
||||
// goes the same direction for most of the optimizer and are used to give a
|
||||
// fake exit path to infinite loops. At this late stage they need to turn
|
||||
// into Goto's so that when you enter the infinite loop you indeed hang.
|
||||
void convert_NeverBranch_to_Goto(Block *b);
|
||||
|
||||
CFGLoop* create_loop_tree();
|
||||
|
||||
// Insert a node into a block, and update the _bbs
|
||||
void insert( Block *b, uint idx, Node *n ) {
|
||||
// Insert a node into a block at index and map the node to the block
|
||||
void insert(Block *b, uint idx, Node *n) {
|
||||
b->_nodes.insert( idx, n );
|
||||
map_node_to_block(n, b);
|
||||
}
|
||||
|
@ -87,7 +87,6 @@
|
||||
// OptoReg::Bad for not-callee-saved.
|
||||
|
||||
|
||||
//------------------------------OopFlow----------------------------------------
|
||||
// Structure to pass around
|
||||
struct OopFlow : public ResourceObj {
|
||||
short *_callees; // Array mapping register to callee-saved
|
||||
@ -119,7 +118,6 @@ struct OopFlow : public ResourceObj {
|
||||
OopMap *build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live );
|
||||
};
|
||||
|
||||
//------------------------------compute_reach----------------------------------
|
||||
// Given reaching-defs for this block start, compute it for this block end
|
||||
void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash ) {
|
||||
|
||||
@ -177,7 +175,6 @@ void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehas
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------merge------------------------------------------
|
||||
// Merge the given flow into the 'this' flow
|
||||
void OopFlow::merge( OopFlow *flow, int max_reg ) {
|
||||
assert( _b == NULL, "merging into a happy flow" );
|
||||
@ -197,14 +194,12 @@ void OopFlow::merge( OopFlow *flow, int max_reg ) {
|
||||
|
||||
}
|
||||
|
||||
//------------------------------clone------------------------------------------
|
||||
void OopFlow::clone( OopFlow *flow, int max_size ) {
|
||||
_b = flow->_b;
|
||||
memcpy( _callees, flow->_callees, sizeof(short)*max_size);
|
||||
memcpy( _defs , flow->_defs , sizeof(Node*)*max_size);
|
||||
}
|
||||
|
||||
//------------------------------make-------------------------------------------
|
||||
OopFlow *OopFlow::make( Arena *A, int max_size, Compile* C ) {
|
||||
short *callees = NEW_ARENA_ARRAY(A,short,max_size+1);
|
||||
Node **defs = NEW_ARENA_ARRAY(A,Node*,max_size+1);
|
||||
@ -215,7 +210,6 @@ OopFlow *OopFlow::make( Arena *A, int max_size, Compile* C ) {
|
||||
return flow;
|
||||
}
|
||||
|
||||
//------------------------------bit twiddlers----------------------------------
|
||||
static int get_live_bit( int *live, int reg ) {
|
||||
return live[reg>>LogBitsPerInt] & (1<<(reg&(BitsPerInt-1))); }
|
||||
static void set_live_bit( int *live, int reg ) {
|
||||
@ -223,7 +217,6 @@ static void set_live_bit( int *live, int reg ) {
|
||||
static void clr_live_bit( int *live, int reg ) {
|
||||
live[reg>>LogBitsPerInt] &= ~(1<<(reg&(BitsPerInt-1))); }
|
||||
|
||||
//------------------------------build_oop_map----------------------------------
|
||||
// Build an oopmap from the current flow info
|
||||
OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ) {
|
||||
int framesize = regalloc->_framesize;
|
||||
@ -412,19 +405,18 @@ OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, i
|
||||
return omap;
|
||||
}
|
||||
|
||||
//------------------------------do_liveness------------------------------------
|
||||
// Compute backwards liveness on registers
|
||||
static void do_liveness( PhaseRegAlloc *regalloc, PhaseCFG *cfg, Block_List *worklist, int max_reg_ints, Arena *A, Dict *safehash ) {
|
||||
int *live = NEW_ARENA_ARRAY(A, int, (cfg->_num_blocks+1) * max_reg_ints);
|
||||
int *tmp_live = &live[cfg->_num_blocks * max_reg_ints];
|
||||
Node *root = cfg->C->root();
|
||||
static void do_liveness(PhaseRegAlloc* regalloc, PhaseCFG* cfg, Block_List* worklist, int max_reg_ints, Arena* A, Dict* safehash) {
|
||||
int* live = NEW_ARENA_ARRAY(A, int, (cfg->number_of_blocks() + 1) * max_reg_ints);
|
||||
int* tmp_live = &live[cfg->number_of_blocks() * max_reg_ints];
|
||||
Node* root = cfg->get_root_node();
|
||||
// On CISC platforms, get the node representing the stack pointer that regalloc
|
||||
// used for spills
|
||||
Node *fp = NodeSentinel;
|
||||
if (UseCISCSpill && root->req() > 1) {
|
||||
fp = root->in(1)->in(TypeFunc::FramePtr);
|
||||
}
|
||||
memset( live, 0, cfg->_num_blocks * (max_reg_ints<<LogBytesPerInt) );
|
||||
memset(live, 0, cfg->number_of_blocks() * (max_reg_ints << LogBytesPerInt));
|
||||
// Push preds onto worklist
|
||||
for (uint i = 1; i < root->req(); i++) {
|
||||
Block* block = cfg->get_block_for_node(root->in(i));
|
||||
@ -549,29 +541,32 @@ static void do_liveness( PhaseRegAlloc *regalloc, PhaseCFG *cfg, Block_List *wor
|
||||
// Scan for any missing safepoints. Happens to infinite loops
|
||||
// ala ZKM.jar
|
||||
uint i;
|
||||
for( i=1; i<cfg->_num_blocks; i++ ) {
|
||||
Block *b = cfg->_blocks[i];
|
||||
for (i = 1; i < cfg->number_of_blocks(); i++) {
|
||||
Block* block = cfg->get_block(i);
|
||||
uint j;
|
||||
for( j=1; j<b->_nodes.size(); j++ )
|
||||
if( b->_nodes[j]->jvms() &&
|
||||
(*safehash)[b->_nodes[j]] == NULL )
|
||||
for (j = 1; j < block->_nodes.size(); j++) {
|
||||
if (block->_nodes[j]->jvms() && (*safehash)[block->_nodes[j]] == NULL) {
|
||||
break;
|
||||
if( j<b->_nodes.size() ) break;
|
||||
}
|
||||
}
|
||||
if (j < block->_nodes.size()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( i == cfg->_num_blocks )
|
||||
if (i == cfg->number_of_blocks()) {
|
||||
break; // Got 'em all
|
||||
}
|
||||
#ifndef PRODUCT
|
||||
if( PrintOpto && Verbose )
|
||||
tty->print_cr("retripping live calc");
|
||||
#endif
|
||||
// Force the issue (expensively): recheck everybody
|
||||
for( i=1; i<cfg->_num_blocks; i++ )
|
||||
worklist->push(cfg->_blocks[i]);
|
||||
for (i = 1; i < cfg->number_of_blocks(); i++) {
|
||||
worklist->push(cfg->get_block(i));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//------------------------------BuildOopMaps-----------------------------------
|
||||
// Collect GC mask info - where are all the OOPs?
|
||||
void Compile::BuildOopMaps() {
|
||||
NOT_PRODUCT( TracePhase t3("bldOopMaps", &_t_buildOopMaps, TimeCompiler); )
|
||||
@ -592,12 +587,12 @@ void Compile::BuildOopMaps() {
|
||||
OopFlow *free_list = NULL; // Free, unused
|
||||
|
||||
// Array mapping blocks to completed oopflows
|
||||
OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->_num_blocks);
|
||||
memset( flows, 0, _cfg->_num_blocks*sizeof(OopFlow*) );
|
||||
OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->number_of_blocks());
|
||||
memset( flows, 0, _cfg->number_of_blocks() * sizeof(OopFlow*) );
|
||||
|
||||
|
||||
// Do the first block 'by hand' to prime the worklist
|
||||
Block *entry = _cfg->_blocks[1];
|
||||
Block *entry = _cfg->get_block(1);
|
||||
OopFlow *rootflow = OopFlow::make(A,max_reg,this);
|
||||
// Initialize to 'bottom' (not 'top')
|
||||
memset( rootflow->_callees, OptoReg::Bad, max_reg*sizeof(short) );
|
||||
@ -623,7 +618,9 @@ void Compile::BuildOopMaps() {
|
||||
|
||||
Block *b = worklist.pop();
|
||||
// Ignore root block
|
||||
if( b == _cfg->_broot ) continue;
|
||||
if (b == _cfg->get_root_block()) {
|
||||
continue;
|
||||
}
|
||||
// Block is already done? Happens if block has several predecessors,
|
||||
// he can get on the worklist more than once.
|
||||
if( flows[b->_pre_order] ) continue;
|
||||
|
@ -40,10 +40,8 @@
|
||||
#include "opto/opcodes.hpp"
|
||||
#include "opto/rootnode.hpp"
|
||||
|
||||
//=============================================================================
|
||||
|
||||
#ifndef PRODUCT
|
||||
void LRG::dump( ) const {
|
||||
void LRG::dump() const {
|
||||
ttyLocker ttyl;
|
||||
tty->print("%d ",num_regs());
|
||||
_mask.dump();
|
||||
@ -94,7 +92,6 @@ void LRG::dump( ) const {
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------score------------------------------------------
|
||||
// Compute score from cost and area. Low score is best to spill.
|
||||
static double raw_score( double cost, double area ) {
|
||||
return cost - (area*RegisterCostAreaRatio) * 1.52588e-5;
|
||||
@ -125,7 +122,6 @@ double LRG::score() const {
|
||||
return score;
|
||||
}
|
||||
|
||||
//------------------------------LRG_List---------------------------------------
|
||||
LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) {
|
||||
memset( _lidxs, 0, sizeof(uint)*max );
|
||||
}
|
||||
@ -211,7 +207,6 @@ uint LiveRangeMap::find_const(uint lrg) const {
|
||||
return next;
|
||||
}
|
||||
|
||||
//------------------------------Chaitin----------------------------------------
|
||||
PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
|
||||
: PhaseRegAlloc(unique, cfg, matcher,
|
||||
#ifndef PRODUCT
|
||||
@ -232,31 +227,31 @@ PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
|
||||
{
|
||||
NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )
|
||||
|
||||
_high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq);
|
||||
_high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());
|
||||
|
||||
// Build a list of basic blocks, sorted by frequency
|
||||
_blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks );
|
||||
_blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
|
||||
// Experiment with sorting strategies to speed compilation
|
||||
double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket
|
||||
Block **buckets[NUMBUCKS]; // Array of buckets
|
||||
uint buckcnt[NUMBUCKS]; // Array of bucket counters
|
||||
double buckval[NUMBUCKS]; // Array of bucket value cutoffs
|
||||
for (uint i = 0; i < NUMBUCKS; i++) {
|
||||
buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg._num_blocks);
|
||||
buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
|
||||
buckcnt[i] = 0;
|
||||
// Bump by three orders of magnitude each time
|
||||
cutoff *= 0.001;
|
||||
buckval[i] = cutoff;
|
||||
for (uint j = 0; j < _cfg._num_blocks; j++) {
|
||||
for (uint j = 0; j < _cfg.number_of_blocks(); j++) {
|
||||
buckets[i][j] = NULL;
|
||||
}
|
||||
}
|
||||
// Sort blocks into buckets
|
||||
for (uint i = 0; i < _cfg._num_blocks; i++) {
|
||||
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
for (uint j = 0; j < NUMBUCKS; j++) {
|
||||
if ((j == NUMBUCKS - 1) || (_cfg._blocks[i]->_freq > buckval[j])) {
|
||||
if ((j == NUMBUCKS - 1) || (_cfg.get_block(i)->_freq > buckval[j])) {
|
||||
// Assign block to end of list for appropriate bucket
|
||||
buckets[j][buckcnt[j]++] = _cfg._blocks[i];
|
||||
buckets[j][buckcnt[j]++] = _cfg.get_block(i);
|
||||
break; // kick out of inner loop
|
||||
}
|
||||
}
|
||||
@ -269,10 +264,9 @@ PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
|
||||
}
|
||||
}
|
||||
|
||||
assert(blkcnt == _cfg._num_blocks, "Block array not totally filled");
|
||||
assert(blkcnt == _cfg.number_of_blocks(), "Block array not totally filled");
|
||||
}
|
||||
|
||||
//------------------------------Union------------------------------------------
|
||||
// union 2 sets together.
|
||||
void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) {
|
||||
uint src = _lrg_map.find(src_n);
|
||||
@ -285,7 +279,6 @@ void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) {
|
||||
_lrg_map.uf_map(dst, src);
|
||||
}
|
||||
|
||||
//------------------------------new_lrg----------------------------------------
|
||||
void PhaseChaitin::new_lrg(const Node *x, uint lrg) {
|
||||
// Make the Node->LRG mapping
|
||||
_lrg_map.extend(x->_idx,lrg);
|
||||
@ -311,7 +304,6 @@ bool PhaseChaitin::clone_projs_shared(Block *b, uint idx, Node *con, Node *copy,
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------compact----------------------------------------
|
||||
// Renumber the live ranges to compact them. Makes the IFG smaller.
|
||||
void PhaseChaitin::compact() {
|
||||
// Current the _uf_map contains a series of short chains which are headed
|
||||
@ -677,20 +669,19 @@ void PhaseChaitin::Register_Allocate() {
|
||||
C->set_indexSet_arena(NULL); // ResourceArea is at end of scope
|
||||
}
|
||||
|
||||
//------------------------------de_ssa-----------------------------------------
|
||||
void PhaseChaitin::de_ssa() {
|
||||
// Set initial Names for all Nodes. Most Nodes get the virtual register
|
||||
// number. A few get the ZERO live range number. These do not
|
||||
// get allocated, but instead rely on correct scheduling to ensure that
|
||||
// only one instance is simultaneously live at a time.
|
||||
uint lr_counter = 1;
|
||||
for( uint i = 0; i < _cfg._num_blocks; i++ ) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
uint cnt = b->_nodes.size();
|
||||
for( uint i = 0; i < _cfg.number_of_blocks(); i++ ) {
|
||||
Block* block = _cfg.get_block(i);
|
||||
uint cnt = block->_nodes.size();
|
||||
|
||||
// Handle all the normal Nodes in the block
|
||||
for( uint j = 0; j < cnt; j++ ) {
|
||||
Node *n = b->_nodes[j];
|
||||
Node *n = block->_nodes[j];
|
||||
// Pre-color to the zero live range, or pick virtual register
|
||||
const RegMask &rm = n->out_RegMask();
|
||||
_lrg_map.map(n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0);
|
||||
@ -701,52 +692,55 @@ void PhaseChaitin::de_ssa() {
|
||||
}
|
||||
|
||||
|
||||
//------------------------------gather_lrg_masks-------------------------------
|
||||
// Gather LiveRanGe information, including register masks. Modification of
|
||||
// cisc spillable in_RegMasks should not be done before AggressiveCoalesce.
|
||||
void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
||||
|
||||
// Nail down the frame pointer live range
|
||||
uint fp_lrg = _lrg_map.live_range_id(_cfg._root->in(1)->in(TypeFunc::FramePtr));
|
||||
uint fp_lrg = _lrg_map.live_range_id(_cfg.get_root_node()->in(1)->in(TypeFunc::FramePtr));
|
||||
lrgs(fp_lrg)._cost += 1e12; // Cost is infinite
|
||||
|
||||
// For all blocks
|
||||
for( uint i = 0; i < _cfg._num_blocks; i++ ) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
Block* block = _cfg.get_block(i);
|
||||
|
||||
// For all instructions
|
||||
for( uint j = 1; j < b->_nodes.size(); j++ ) {
|
||||
Node *n = b->_nodes[j];
|
||||
for (uint j = 1; j < block->_nodes.size(); j++) {
|
||||
Node* n = block->_nodes[j];
|
||||
uint input_edge_start =1; // Skip control most nodes
|
||||
if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base();
|
||||
if (n->is_Mach()) {
|
||||
input_edge_start = n->as_Mach()->oper_input_base();
|
||||
}
|
||||
uint idx = n->is_Copy();
|
||||
|
||||
// Get virtual register number, same as LiveRanGe index
|
||||
uint vreg = _lrg_map.live_range_id(n);
|
||||
LRG &lrg = lrgs(vreg);
|
||||
if( vreg ) { // No vreg means un-allocable (e.g. memory)
|
||||
LRG& lrg = lrgs(vreg);
|
||||
if (vreg) { // No vreg means un-allocable (e.g. memory)
|
||||
|
||||
// Collect has-copy bit
|
||||
if( idx ) {
|
||||
if (idx) {
|
||||
lrg._has_copy = 1;
|
||||
uint clidx = _lrg_map.live_range_id(n->in(idx));
|
||||
LRG ©_src = lrgs(clidx);
|
||||
LRG& copy_src = lrgs(clidx);
|
||||
copy_src._has_copy = 1;
|
||||
}
|
||||
|
||||
// Check for float-vs-int live range (used in register-pressure
|
||||
// calculations)
|
||||
const Type *n_type = n->bottom_type();
|
||||
if (n_type->is_floatingpoint())
|
||||
if (n_type->is_floatingpoint()) {
|
||||
lrg._is_float = 1;
|
||||
}
|
||||
|
||||
// Check for twice prior spilling. Once prior spilling might have
|
||||
// spilled 'soft', 2nd prior spill should have spilled 'hard' and
|
||||
// further spilling is unlikely to make progress.
|
||||
if( _spilled_once.test(n->_idx) ) {
|
||||
if (_spilled_once.test(n->_idx)) {
|
||||
lrg._was_spilled1 = 1;
|
||||
if( _spilled_twice.test(n->_idx) )
|
||||
if (_spilled_twice.test(n->_idx)) {
|
||||
lrg._was_spilled2 = 1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
@ -783,16 +777,18 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
||||
|
||||
// Check for bound register masks
|
||||
const RegMask &lrgmask = lrg.mask();
|
||||
if (lrgmask.is_bound(ireg))
|
||||
if (lrgmask.is_bound(ireg)) {
|
||||
lrg._is_bound = 1;
|
||||
}
|
||||
|
||||
// Check for maximum frequency value
|
||||
if (lrg._maxfreq < b->_freq)
|
||||
lrg._maxfreq = b->_freq;
|
||||
if (lrg._maxfreq < block->_freq) {
|
||||
lrg._maxfreq = block->_freq;
|
||||
}
|
||||
|
||||
// Check for oop-iness, or long/double
|
||||
// Check for multi-kill projection
|
||||
switch( ireg ) {
|
||||
switch (ireg) {
|
||||
case MachProjNode::fat_proj:
|
||||
// Fat projections have size equal to number of registers killed
|
||||
lrg.set_num_regs(rm.Size());
|
||||
@ -962,7 +958,7 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
||||
// AggressiveCoalesce. This effectively pre-virtual-splits
|
||||
// around uncommon uses of common defs.
|
||||
const RegMask &rm = n->in_RegMask(k);
|
||||
if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * b->_freq) {
|
||||
if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * block->_freq) {
|
||||
// Since we are BEFORE aggressive coalesce, leave the register
|
||||
// mask untrimmed by the call. This encourages more coalescing.
|
||||
// Later, AFTER aggressive, this live range will have to spill
|
||||
@ -1006,8 +1002,9 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
||||
}
|
||||
|
||||
// Check for maximum frequency value
|
||||
if( lrg._maxfreq < b->_freq )
|
||||
lrg._maxfreq = b->_freq;
|
||||
if (lrg._maxfreq < block->_freq) {
|
||||
lrg._maxfreq = block->_freq;
|
||||
}
|
||||
|
||||
} // End for all allocated inputs
|
||||
} // end for all instructions
|
||||
@ -1029,7 +1026,6 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------set_was_low------------------------------------
|
||||
// Set the was-lo-degree bit. Conservative coalescing should not change the
|
||||
// colorability of the graph. If any live range was of low-degree before
|
||||
// coalescing, it should Simplify. This call sets the was-lo-degree bit.
|
||||
@ -1066,7 +1062,6 @@ void PhaseChaitin::set_was_low() {
|
||||
|
||||
#define REGISTER_CONSTRAINED 16
|
||||
|
||||
//------------------------------cache_lrg_info---------------------------------
|
||||
// Compute cost/area ratio, in case we spill. Build the lo-degree list.
|
||||
void PhaseChaitin::cache_lrg_info( ) {
|
||||
|
||||
@ -1100,7 +1095,6 @@ void PhaseChaitin::cache_lrg_info( ) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------Pre-Simplify-----------------------------------
|
||||
// Simplify the IFG by removing LRGs of low degree that have NO copies
|
||||
void PhaseChaitin::Pre_Simplify( ) {
|
||||
|
||||
@ -1151,7 +1145,6 @@ void PhaseChaitin::Pre_Simplify( ) {
|
||||
// No more lo-degree no-copy live ranges to simplify
|
||||
}
|
||||
|
||||
//------------------------------Simplify---------------------------------------
|
||||
// Simplify the IFG by removing LRGs of low degree.
|
||||
void PhaseChaitin::Simplify( ) {
|
||||
|
||||
@ -1288,7 +1281,6 @@ void PhaseChaitin::Simplify( ) {
|
||||
|
||||
}
|
||||
|
||||
//------------------------------is_legal_reg-----------------------------------
|
||||
// Is 'reg' register legal for 'lrg'?
|
||||
static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
|
||||
if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) &&
|
||||
@ -1315,7 +1307,6 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
|
||||
return false;
|
||||
}
|
||||
|
||||
//------------------------------bias_color-------------------------------------
|
||||
// Choose a color using the biasing heuristic
|
||||
OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
|
||||
|
||||
@ -1377,7 +1368,6 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
|
||||
return OptoReg::add( reg, chunk );
|
||||
}
|
||||
|
||||
//------------------------------choose_color-----------------------------------
|
||||
// Choose a color in the current chunk
|
||||
OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {
|
||||
assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)");
|
||||
@ -1399,7 +1389,6 @@ OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {
|
||||
return lrg.mask().find_last_elem();
|
||||
}
|
||||
|
||||
//------------------------------Select-----------------------------------------
|
||||
// Select colors by re-inserting LRGs back into the IFG. LRGs are re-inserted
|
||||
// in reverse order of removal. As long as nothing of hi-degree was yanked,
|
||||
// everything going back is guaranteed a color. Select that color. If some
|
||||
@ -1574,8 +1563,6 @@ uint PhaseChaitin::Select( ) {
|
||||
return spill_reg-LRG::SPILL_REG; // Return number of spills
|
||||
}
|
||||
|
||||
|
||||
//------------------------------copy_was_spilled-------------------------------
|
||||
// Copy 'was_spilled'-edness from the source Node to the dst Node.
|
||||
void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) {
|
||||
if( _spilled_once.test(src->_idx) ) {
|
||||
@ -1588,14 +1575,12 @@ void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------set_was_spilled--------------------------------
|
||||
// Set the 'spilled_once' or 'spilled_twice' flag on a node.
|
||||
void PhaseChaitin::set_was_spilled( Node *n ) {
|
||||
if( _spilled_once.test_set(n->_idx) )
|
||||
_spilled_twice.set(n->_idx);
|
||||
}
|
||||
|
||||
//------------------------------fixup_spills-----------------------------------
|
||||
// Convert Ideal spill instructions into proper FramePtr + offset Loads and
|
||||
// Stores. Use-def chains are NOT preserved, but Node->LRG->reg maps are.
|
||||
void PhaseChaitin::fixup_spills() {
|
||||
@ -1605,16 +1590,16 @@ void PhaseChaitin::fixup_spills() {
|
||||
NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); )
|
||||
|
||||
// Grab the Frame Pointer
|
||||
Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr);
|
||||
Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr);
|
||||
|
||||
// For all blocks
|
||||
for( uint i = 0; i < _cfg._num_blocks; i++ ) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
Block* block = _cfg.get_block(i);
|
||||
|
||||
// For all instructions in block
|
||||
uint last_inst = b->end_idx();
|
||||
for( uint j = 1; j <= last_inst; j++ ) {
|
||||
Node *n = b->_nodes[j];
|
||||
uint last_inst = block->end_idx();
|
||||
for (uint j = 1; j <= last_inst; j++) {
|
||||
Node* n = block->_nodes[j];
|
||||
|
||||
// Dead instruction???
|
||||
assert( n->outcnt() != 0 ||// Nothing dead after post alloc
|
||||
@ -1651,7 +1636,7 @@ void PhaseChaitin::fixup_spills() {
|
||||
assert( cisc->oper_input_base() == 2, "Only adding one edge");
|
||||
cisc->ins_req(1,src); // Requires a memory edge
|
||||
}
|
||||
b->_nodes.map(j,cisc); // Insert into basic block
|
||||
block->_nodes.map(j,cisc); // Insert into basic block
|
||||
n->subsume_by(cisc, C); // Correct graph
|
||||
//
|
||||
++_used_cisc_instructions;
|
||||
@ -1677,7 +1662,6 @@ void PhaseChaitin::fixup_spills() {
|
||||
} // End of for all blocks
|
||||
}
|
||||
|
||||
//------------------------------find_base_for_derived--------------------------
|
||||
// Helper to stretch above; recursively discover the base Node for a
|
||||
// given derived Node. Easy for AddP-related machine nodes, but needs
|
||||
// to be recursive for derived Phis.
|
||||
@ -1707,7 +1691,7 @@ Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derive
|
||||
// Initialize it once and make it shared:
|
||||
// set control to _root and place it into Start block
|
||||
// (where top() node is placed).
|
||||
base->init_req(0, _cfg._root);
|
||||
base->init_req(0, _cfg.get_root_node());
|
||||
Block *startb = _cfg.get_block_for_node(C->top());
|
||||
startb->_nodes.insert(startb->find_node(C->top()), base );
|
||||
_cfg.map_node_to_block(base, startb);
|
||||
@ -1716,7 +1700,7 @@ Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derive
|
||||
if (_lrg_map.live_range_id(base) == 0) {
|
||||
new_lrg(base, maxlrg++);
|
||||
}
|
||||
assert(base->in(0) == _cfg._root && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared");
|
||||
assert(base->in(0) == _cfg.get_root_node() && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared");
|
||||
derived_base_map[derived->_idx] = base;
|
||||
return base;
|
||||
}
|
||||
@ -1779,8 +1763,6 @@ Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derive
|
||||
return base;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------stretch_base_pointer_live_ranges---------------
|
||||
// At each Safepoint, insert extra debug edges for each pair of derived value/
|
||||
// base pointer that is live across the Safepoint for oopmap building. The
|
||||
// edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the
|
||||
@ -1792,14 +1774,14 @@ bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {
|
||||
memset( derived_base_map, 0, sizeof(Node*)*C->unique() );
|
||||
|
||||
// For all blocks in RPO do...
|
||||
for( uint i=0; i<_cfg._num_blocks; i++ ) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
Block* block = _cfg.get_block(i);
|
||||
// Note use of deep-copy constructor. I cannot hammer the original
|
||||
// liveout bits, because they are needed by the following coalesce pass.
|
||||
IndexSet liveout(_live->live(b));
|
||||
IndexSet liveout(_live->live(block));
|
||||
|
||||
for( uint j = b->end_idx() + 1; j > 1; j-- ) {
|
||||
Node *n = b->_nodes[j-1];
|
||||
for (uint j = block->end_idx() + 1; j > 1; j--) {
|
||||
Node* n = block->_nodes[j - 1];
|
||||
|
||||
// Pre-split compares of loop-phis. Loop-phis form a cycle we would
|
||||
// like to see in the same register. Compare uses the loop-phi and so
|
||||
@ -1814,7 +1796,7 @@ bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {
|
||||
Node *phi = n->in(1);
|
||||
if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) {
|
||||
Block *phi_block = _cfg.get_block_for_node(phi);
|
||||
if (_cfg.get_block_for_node(phi_block->pred(2)) == b) {
|
||||
if (_cfg.get_block_for_node(phi_block->pred(2)) == block) {
|
||||
const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI];
|
||||
Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask );
|
||||
insert_proj( phi_block, 1, spill, maxlrg++ );
|
||||
@ -1868,7 +1850,7 @@ bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {
|
||||
if ((_lrg_map.live_range_id(base) >= _lrg_map.max_lrg_id() || // (Brand new base (hence not live) or
|
||||
!liveout.member(_lrg_map.live_range_id(base))) && // not live) AND
|
||||
(_lrg_map.live_range_id(base) > 0) && // not a constant
|
||||
_cfg.get_block_for_node(base) != b) { // base not def'd in blk)
|
||||
_cfg.get_block_for_node(base) != block) { // base not def'd in blk)
|
||||
// Base pointer is not currently live. Since I stretched
|
||||
// the base pointer to here and it crosses basic-block
|
||||
// boundaries, the global live info is now incorrect.
|
||||
@ -1903,15 +1885,12 @@ bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {
|
||||
return must_recompute_live != 0;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------add_reference----------------------------------
|
||||
// Extend the node to LRG mapping
|
||||
|
||||
void PhaseChaitin::add_reference(const Node *node, const Node *old_node) {
|
||||
_lrg_map.extend(node->_idx, _lrg_map.live_range_id(old_node));
|
||||
}
|
||||
|
||||
//------------------------------dump-------------------------------------------
|
||||
#ifndef PRODUCT
|
||||
void PhaseChaitin::dump(const Node *n) const {
|
||||
uint r = (n->_idx < _lrg_map.size()) ? _lrg_map.find_const(n) : 0;
|
||||
@ -2017,8 +1996,9 @@ void PhaseChaitin::dump() const {
|
||||
_matcher._new_SP, _framesize );
|
||||
|
||||
// For all blocks
|
||||
for( uint i = 0; i < _cfg._num_blocks; i++ )
|
||||
dump(_cfg._blocks[i]);
|
||||
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
dump(_cfg.get_block(i));
|
||||
}
|
||||
// End of per-block dump
|
||||
tty->print("\n");
|
||||
|
||||
@ -2059,7 +2039,6 @@ void PhaseChaitin::dump() const {
|
||||
tty->print_cr("");
|
||||
}
|
||||
|
||||
//------------------------------dump_degree_lists------------------------------
|
||||
void PhaseChaitin::dump_degree_lists() const {
|
||||
// Dump lo-degree list
|
||||
tty->print("Lo degree: ");
|
||||
@ -2080,7 +2059,6 @@ void PhaseChaitin::dump_degree_lists() const {
|
||||
tty->print_cr("");
|
||||
}
|
||||
|
||||
//------------------------------dump_simplified--------------------------------
|
||||
void PhaseChaitin::dump_simplified() const {
|
||||
tty->print("Simplified: ");
|
||||
for( uint i = _simplified; i; i = lrgs(i)._next )
|
||||
@ -2099,7 +2077,6 @@ static char *print_reg( OptoReg::Name reg, const PhaseChaitin *pc, char *buf ) {
|
||||
return buf+strlen(buf);
|
||||
}
|
||||
|
||||
//------------------------------dump_register----------------------------------
|
||||
// Dump a register name into a buffer. Be intelligent if we get called
|
||||
// before allocation is complete.
|
||||
char *PhaseChaitin::dump_register( const Node *n, char *buf ) const {
|
||||
@ -2133,7 +2110,6 @@ char *PhaseChaitin::dump_register( const Node *n, char *buf ) const {
|
||||
return buf+strlen(buf);
|
||||
}
|
||||
|
||||
//----------------------dump_for_spill_split_recycle--------------------------
|
||||
void PhaseChaitin::dump_for_spill_split_recycle() const {
|
||||
if( WizardMode && (PrintCompilation || PrintOpto) ) {
|
||||
// Display which live ranges need to be split and the allocator's state
|
||||
@ -2149,7 +2125,6 @@ void PhaseChaitin::dump_for_spill_split_recycle() const {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------dump_frame------------------------------------
|
||||
void PhaseChaitin::dump_frame() const {
|
||||
const char *fp = OptoReg::regname(OptoReg::c_frame_pointer);
|
||||
const TypeTuple *domain = C->tf()->domain();
|
||||
@ -2255,17 +2230,16 @@ void PhaseChaitin::dump_frame() const {
|
||||
tty->print_cr("#");
|
||||
}
|
||||
|
||||
//------------------------------dump_bb----------------------------------------
|
||||
void PhaseChaitin::dump_bb( uint pre_order ) const {
|
||||
tty->print_cr("---dump of B%d---",pre_order);
|
||||
for( uint i = 0; i < _cfg._num_blocks; i++ ) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
if( b->_pre_order == pre_order )
|
||||
dump(b);
|
||||
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
Block* block = _cfg.get_block(i);
|
||||
if (block->_pre_order == pre_order) {
|
||||
dump(block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------dump_lrg---------------------------------------
|
||||
void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {
|
||||
tty->print_cr("---dump of L%d---",lidx);
|
||||
|
||||
@ -2287,17 +2261,17 @@ void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {
|
||||
tty->cr();
|
||||
}
|
||||
// For all blocks
|
||||
for( uint i = 0; i < _cfg._num_blocks; i++ ) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
Block* block = _cfg.get_block(i);
|
||||
int dump_once = 0;
|
||||
|
||||
// For all instructions
|
||||
for( uint j = 0; j < b->_nodes.size(); j++ ) {
|
||||
Node *n = b->_nodes[j];
|
||||
for( uint j = 0; j < block->_nodes.size(); j++ ) {
|
||||
Node *n = block->_nodes[j];
|
||||
if (_lrg_map.find_const(n) == lidx) {
|
||||
if (!dump_once++) {
|
||||
tty->cr();
|
||||
b->dump_head(&_cfg);
|
||||
block->dump_head(&_cfg);
|
||||
}
|
||||
dump(n);
|
||||
continue;
|
||||
@ -2312,7 +2286,7 @@ void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {
|
||||
if (_lrg_map.find_const(m) == lidx) {
|
||||
if (!dump_once++) {
|
||||
tty->cr();
|
||||
b->dump_head(&_cfg);
|
||||
block->dump_head(&_cfg);
|
||||
}
|
||||
dump(n);
|
||||
}
|
||||
@ -2324,7 +2298,6 @@ void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {
|
||||
}
|
||||
#endif // not PRODUCT
|
||||
|
||||
//------------------------------print_chaitin_statistics-------------------------------
|
||||
int PhaseChaitin::_final_loads = 0;
|
||||
int PhaseChaitin::_final_stores = 0;
|
||||
int PhaseChaitin::_final_memoves= 0;
|
||||
|
@ -34,8 +34,6 @@
|
||||
#include "opto/matcher.hpp"
|
||||
#include "opto/regmask.hpp"
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------Dump-------------------------------------------
|
||||
#ifndef PRODUCT
|
||||
void PhaseCoalesce::dump(Node *n) const {
|
||||
// Being a const function means I cannot use 'Find'
|
||||
@ -43,12 +41,11 @@ void PhaseCoalesce::dump(Node *n) const {
|
||||
tty->print("L%d/N%d ",r,n->_idx);
|
||||
}
|
||||
|
||||
//------------------------------dump-------------------------------------------
|
||||
void PhaseCoalesce::dump() const {
|
||||
// I know I have a block layout now, so I can print blocks in a loop
|
||||
for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
|
||||
for( uint i=0; i<_phc._cfg.number_of_blocks(); i++ ) {
|
||||
uint j;
|
||||
Block *b = _phc._cfg._blocks[i];
|
||||
Block* b = _phc._cfg.get_block(i);
|
||||
// Print a nice block header
|
||||
tty->print("B%d: ",b->_pre_order);
|
||||
for( j=1; j<b->num_preds(); j++ )
|
||||
@ -85,7 +82,6 @@ void PhaseCoalesce::dump() const {
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------combine_these_two------------------------------
|
||||
// Combine the live ranges def'd by these 2 Nodes. N2 is an input to N1.
|
||||
void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) {
|
||||
uint lr1 = _phc._lrg_map.find(n1);
|
||||
@ -127,18 +123,15 @@ void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------coalesce_driver--------------------------------
|
||||
// Copy coalescing
|
||||
void PhaseCoalesce::coalesce_driver( ) {
|
||||
|
||||
void PhaseCoalesce::coalesce_driver() {
|
||||
verify();
|
||||
// Coalesce from high frequency to low
|
||||
for( uint i=0; i<_phc._cfg._num_blocks; i++ )
|
||||
coalesce( _phc._blks[i] );
|
||||
|
||||
for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
|
||||
coalesce(_phc._blks[i]);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------insert_copy_with_overlap-----------------------
|
||||
// I am inserting copies to come out of SSA form. In the general case, I am
|
||||
// doing a parallel renaming. I'm in the Named world now, so I can't do a
|
||||
// general parallel renaming. All the copies now use "names" (live-ranges)
|
||||
@ -216,7 +209,6 @@ void PhaseAggressiveCoalesce::insert_copy_with_overlap( Block *b, Node *copy, ui
|
||||
b->_nodes.insert(last_use_idx+1,copy);
|
||||
}
|
||||
|
||||
//------------------------------insert_copies----------------------------------
|
||||
void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
|
||||
// We do LRGs compressing and fix a liveout data only here since the other
|
||||
// place in Split() is guarded by the assert which we never hit.
|
||||
@ -225,8 +217,8 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
|
||||
for (uint lrg = 1; lrg < _phc._lrg_map.max_lrg_id(); lrg++) {
|
||||
uint compressed_lrg = _phc._lrg_map.find(lrg);
|
||||
if (lrg != compressed_lrg) {
|
||||
for (uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++) {
|
||||
IndexSet *liveout = _phc._live->live(_phc._cfg._blocks[bidx]);
|
||||
for (uint bidx = 0; bidx < _phc._cfg.number_of_blocks(); bidx++) {
|
||||
IndexSet *liveout = _phc._live->live(_phc._cfg.get_block(bidx));
|
||||
if (liveout->member(lrg)) {
|
||||
liveout->remove(lrg);
|
||||
liveout->insert(compressed_lrg);
|
||||
@ -239,10 +231,10 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
|
||||
// Nodes with index less than '_unique' are original, non-virtual Nodes.
|
||||
_unique = C->unique();
|
||||
|
||||
for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
|
||||
for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
|
||||
C->check_node_count(NodeLimitFudgeFactor, "out of nodes in coalesce");
|
||||
if (C->failing()) return;
|
||||
Block *b = _phc._cfg._blocks[i];
|
||||
Block *b = _phc._cfg.get_block(i);
|
||||
uint cnt = b->num_preds(); // Number of inputs to the Phi
|
||||
|
||||
for( uint l = 1; l<b->_nodes.size(); l++ ) {
|
||||
@ -403,8 +395,7 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
|
||||
} // End of for all blocks
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------coalesce---------------------------------------
|
||||
|
||||
// Aggressive (but pessimistic) copy coalescing of a single block
|
||||
|
||||
// The following coalesce pass represents a single round of aggressive
|
||||
@ -464,20 +455,16 @@ void PhaseAggressiveCoalesce::coalesce( Block *b ) {
|
||||
} // End of for all instructions in block
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------PhaseConservativeCoalesce----------------------
|
||||
PhaseConservativeCoalesce::PhaseConservativeCoalesce(PhaseChaitin &chaitin) : PhaseCoalesce(chaitin) {
|
||||
_ulr.initialize(_phc._lrg_map.max_lrg_id());
|
||||
}
|
||||
|
||||
//------------------------------verify-----------------------------------------
|
||||
void PhaseConservativeCoalesce::verify() {
|
||||
#ifdef ASSERT
|
||||
_phc.set_was_low();
|
||||
#endif
|
||||
}
|
||||
|
||||
//------------------------------union_helper-----------------------------------
|
||||
void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
|
||||
// Join live ranges. Merge larger into smaller. Union lr2 into lr1 in the
|
||||
// union-find tree
|
||||
@ -520,7 +507,6 @@ void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, ui
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------compute_separating_interferences---------------
|
||||
// Factored code from copy_copy that computes extra interferences from
|
||||
// lengthening a live range by double-coalescing.
|
||||
uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) {
|
||||
@ -586,7 +572,6 @@ uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy,
|
||||
return reg_degree;
|
||||
}
|
||||
|
||||
//------------------------------update_ifg-------------------------------------
|
||||
void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) {
|
||||
// Some original neighbors of lr1 might have gone away
|
||||
// because the constrained register mask prevented them.
|
||||
@ -616,7 +601,6 @@ void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1,
|
||||
lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
|
||||
}
|
||||
|
||||
//------------------------------record_bias------------------------------------
|
||||
static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
|
||||
// Tag copy bias here
|
||||
if( !ifg->lrgs(lr1)._copy_bias )
|
||||
@ -625,7 +609,6 @@ static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
|
||||
ifg->lrgs(lr2)._copy_bias = lr1;
|
||||
}
|
||||
|
||||
//------------------------------copy_copy--------------------------------------
|
||||
// See if I can coalesce a series of multiple copies together. I need the
|
||||
// final dest copy and the original src copy. They can be the same Node.
|
||||
// Compute the compatible register masks.
|
||||
@ -785,7 +768,6 @@ bool PhaseConservativeCoalesce::copy_copy(Node *dst_copy, Node *src_copy, Block
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------coalesce---------------------------------------
|
||||
// Conservative (but pessimistic) copy coalescing of a single block
|
||||
void PhaseConservativeCoalesce::coalesce( Block *b ) {
|
||||
// Bail out on infrequent blocks
|
||||
|
@ -2136,7 +2136,9 @@ void Compile::Optimize() {
|
||||
//------------------------------Code_Gen---------------------------------------
|
||||
// Given a graph, generate code for it
|
||||
void Compile::Code_Gen() {
|
||||
if (failing()) return;
|
||||
if (failing()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Perform instruction selection. You might think we could reclaim Matcher
|
||||
// memory PDQ, but actually the Matcher is used in generating spill code.
|
||||
@ -2148,12 +2150,11 @@ void Compile::Code_Gen() {
|
||||
// nodes. Mapping is only valid at the root of each matched subtree.
|
||||
NOT_PRODUCT( verify_graph_edges(); )
|
||||
|
||||
Node_List proj_list;
|
||||
Matcher m(proj_list);
|
||||
_matcher = &m;
|
||||
Matcher matcher;
|
||||
_matcher = &matcher;
|
||||
{
|
||||
TracePhase t2("matcher", &_t_matcher, true);
|
||||
m.match();
|
||||
matcher.match();
|
||||
}
|
||||
// In debug mode can dump m._nodes.dump() for mapping of ideal to machine
|
||||
// nodes. Mapping is only valid at the root of each matched subtree.
|
||||
@ -2161,31 +2162,26 @@ void Compile::Code_Gen() {
|
||||
|
||||
// If you have too many nodes, or if matching has failed, bail out
|
||||
check_node_count(0, "out of nodes matching instructions");
|
||||
if (failing()) return;
|
||||
if (failing()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Build a proper-looking CFG
|
||||
PhaseCFG cfg(node_arena(), root(), m);
|
||||
PhaseCFG cfg(node_arena(), root(), matcher);
|
||||
_cfg = &cfg;
|
||||
{
|
||||
NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
|
||||
cfg.Dominators();
|
||||
if (failing()) return;
|
||||
|
||||
NOT_PRODUCT( verify_graph_edges(); )
|
||||
|
||||
cfg.Estimate_Block_Frequency();
|
||||
cfg.GlobalCodeMotion(m,unique(),proj_list);
|
||||
if (failing()) return;
|
||||
bool success = cfg.do_global_code_motion();
|
||||
if (!success) {
|
||||
return;
|
||||
}
|
||||
|
||||
print_method(PHASE_GLOBAL_CODE_MOTION, 2);
|
||||
|
||||
NOT_PRODUCT( verify_graph_edges(); )
|
||||
|
||||
debug_only( cfg.verify(); )
|
||||
}
|
||||
NOT_PRODUCT( verify_graph_edges(); )
|
||||
|
||||
PhaseChaitin regalloc(unique(), cfg, m);
|
||||
PhaseChaitin regalloc(unique(), cfg, matcher);
|
||||
_regalloc = ®alloc;
|
||||
{
|
||||
TracePhase t2("regalloc", &_t_registerAllocation, true);
|
||||
@ -2206,7 +2202,7 @@ void Compile::Code_Gen() {
|
||||
// can now safely remove it.
|
||||
{
|
||||
NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
|
||||
cfg.remove_empty();
|
||||
cfg.remove_empty_blocks();
|
||||
if (do_freq_based_layout()) {
|
||||
PhaseBlockLayout layout(cfg);
|
||||
} else {
|
||||
@ -2253,38 +2249,50 @@ void Compile::dump_asm(int *pcs, uint pc_limit) {
|
||||
_regalloc->dump_frame();
|
||||
|
||||
Node *n = NULL;
|
||||
for( uint i=0; i<_cfg->_num_blocks; i++ ) {
|
||||
if (VMThread::should_terminate()) { cut_short = true; break; }
|
||||
Block *b = _cfg->_blocks[i];
|
||||
if (b->is_connector() && !Verbose) continue;
|
||||
n = b->_nodes[0];
|
||||
if (pcs && n->_idx < pc_limit)
|
||||
for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
|
||||
if (VMThread::should_terminate()) {
|
||||
cut_short = true;
|
||||
break;
|
||||
}
|
||||
Block* block = _cfg->get_block(i);
|
||||
if (block->is_connector() && !Verbose) {
|
||||
continue;
|
||||
}
|
||||
n = block->_nodes[0];
|
||||
if (pcs && n->_idx < pc_limit) {
|
||||
tty->print("%3.3x ", pcs[n->_idx]);
|
||||
else
|
||||
} else {
|
||||
tty->print(" ");
|
||||
b->dump_head(_cfg);
|
||||
if (b->is_connector()) {
|
||||
}
|
||||
block->dump_head(_cfg);
|
||||
if (block->is_connector()) {
|
||||
tty->print_cr(" # Empty connector block");
|
||||
} else if (b->num_preds() == 2 && b->pred(1)->is_CatchProj() && b->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
|
||||
} else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
|
||||
tty->print_cr(" # Block is sole successor of call");
|
||||
}
|
||||
|
||||
// For all instructions
|
||||
Node *delay = NULL;
|
||||
for( uint j = 0; j<b->_nodes.size(); j++ ) {
|
||||
if (VMThread::should_terminate()) { cut_short = true; break; }
|
||||
n = b->_nodes[j];
|
||||
for (uint j = 0; j < block->_nodes.size(); j++) {
|
||||
if (VMThread::should_terminate()) {
|
||||
cut_short = true;
|
||||
break;
|
||||
}
|
||||
n = block->_nodes[j];
|
||||
if (valid_bundle_info(n)) {
|
||||
Bundle *bundle = node_bundling(n);
|
||||
Bundle* bundle = node_bundling(n);
|
||||
if (bundle->used_in_unconditional_delay()) {
|
||||
delay = n;
|
||||
continue;
|
||||
}
|
||||
if (bundle->starts_bundle())
|
||||
if (bundle->starts_bundle()) {
|
||||
starts_bundle = '+';
|
||||
}
|
||||
}
|
||||
|
||||
if (WizardMode) n->dump();
|
||||
if (WizardMode) {
|
||||
n->dump();
|
||||
}
|
||||
|
||||
if( !n->is_Region() && // Dont print in the Assembly
|
||||
!n->is_Phi() && // a few noisely useless nodes
|
||||
|
@ -32,9 +32,6 @@
|
||||
|
||||
// Portions of code courtesy of Clifford Click
|
||||
|
||||
// Optimization - Graph Style
|
||||
|
||||
//------------------------------Tarjan-----------------------------------------
|
||||
// A data structure that holds all the information needed to find dominators.
|
||||
struct Tarjan {
|
||||
Block *_block; // Basic block for this info
|
||||
@ -60,23 +57,21 @@ struct Tarjan {
|
||||
|
||||
};
|
||||
|
||||
//------------------------------Dominator--------------------------------------
|
||||
// Compute the dominator tree of the CFG. The CFG must already have been
|
||||
// constructed. This is the Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
|
||||
void PhaseCFG::Dominators( ) {
|
||||
void PhaseCFG::build_dominator_tree() {
|
||||
// Pre-grow the blocks array, prior to the ResourceMark kicking in
|
||||
_blocks.map(_num_blocks,0);
|
||||
_blocks.map(number_of_blocks(), 0);
|
||||
|
||||
ResourceMark rm;
|
||||
// Setup mappings from my Graph to Tarjan's stuff and back
|
||||
// Note: Tarjan uses 1-based arrays
|
||||
Tarjan *tarjan = NEW_RESOURCE_ARRAY(Tarjan,_num_blocks+1);
|
||||
Tarjan* tarjan = NEW_RESOURCE_ARRAY(Tarjan, number_of_blocks() + 1);
|
||||
|
||||
// Tarjan's algorithm, almost verbatim:
|
||||
// Step 1:
|
||||
_rpo_ctr = _num_blocks;
|
||||
uint dfsnum = DFS( tarjan );
|
||||
if( dfsnum-1 != _num_blocks ) {// Check for unreachable loops!
|
||||
uint dfsnum = do_DFS(tarjan, number_of_blocks());
|
||||
if (dfsnum - 1 != number_of_blocks()) { // Check for unreachable loops!
|
||||
// If the returned dfsnum does not match the number of blocks, then we
|
||||
// must have some unreachable loops. These can be made at any time by
|
||||
// IterGVN. They are cleaned up by CCP or the loop opts, but the last
|
||||
@ -93,14 +88,13 @@ void PhaseCFG::Dominators( ) {
|
||||
C->record_method_not_compilable("unreachable loop");
|
||||
return;
|
||||
}
|
||||
_blocks._cnt = _num_blocks;
|
||||
_blocks._cnt = number_of_blocks();
|
||||
|
||||
// Tarjan is using 1-based arrays, so these are some initialize flags
|
||||
tarjan[0]._size = tarjan[0]._semi = 0;
|
||||
tarjan[0]._label = &tarjan[0];
|
||||
|
||||
uint i;
|
||||
for( i=_num_blocks; i>=2; i-- ) { // For all vertices in DFS order
|
||||
for (uint i = number_of_blocks(); i >= 2; i--) { // For all vertices in DFS order
|
||||
Tarjan *w = &tarjan[i]; // Get vertex from DFS
|
||||
|
||||
// Step 2:
|
||||
@ -130,19 +124,19 @@ void PhaseCFG::Dominators( ) {
|
||||
}
|
||||
|
||||
// Step 4:
|
||||
for( i=2; i <= _num_blocks; i++ ) {
|
||||
for (uint i = 2; i <= number_of_blocks(); i++) {
|
||||
Tarjan *w = &tarjan[i];
|
||||
if( w->_dom != &tarjan[w->_semi] )
|
||||
w->_dom = w->_dom->_dom;
|
||||
w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later
|
||||
}
|
||||
// No immediate dominator for the root
|
||||
Tarjan *w = &tarjan[_broot->_pre_order];
|
||||
Tarjan *w = &tarjan[get_root_block()->_pre_order];
|
||||
w->_dom = NULL;
|
||||
w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later
|
||||
|
||||
// Convert the dominator tree array into my kind of graph
|
||||
for( i=1; i<=_num_blocks;i++){// For all Tarjan vertices
|
||||
for(uint i = 1; i <= number_of_blocks(); i++){ // For all Tarjan vertices
|
||||
Tarjan *t = &tarjan[i]; // Handy access
|
||||
Tarjan *tdom = t->_dom; // Handy access to immediate dominator
|
||||
if( tdom ) { // Root has no immediate dominator
|
||||
@ -152,11 +146,10 @@ void PhaseCFG::Dominators( ) {
|
||||
} else
|
||||
t->_block->_idom = NULL; // Root
|
||||
}
|
||||
w->setdepth( _num_blocks+1 ); // Set depth in dominator tree
|
||||
w->setdepth(number_of_blocks() + 1); // Set depth in dominator tree
|
||||
|
||||
}
|
||||
|
||||
//----------------------------Block_Stack--------------------------------------
|
||||
class Block_Stack {
|
||||
private:
|
||||
struct Block_Descr {
|
||||
@ -214,7 +207,6 @@ class Block_Stack {
|
||||
}
|
||||
};
|
||||
|
||||
//-------------------------most_frequent_successor-----------------------------
|
||||
// Find the index into the b->succs[] array of the most frequent successor.
|
||||
uint Block_Stack::most_frequent_successor( Block *b ) {
|
||||
uint freq_idx = 0;
|
||||
@ -258,40 +250,38 @@ uint Block_Stack::most_frequent_successor( Block *b ) {
|
||||
return freq_idx;
|
||||
}
|
||||
|
||||
//------------------------------DFS--------------------------------------------
|
||||
// Perform DFS search. Setup 'vertex' as DFS to vertex mapping. Setup
|
||||
// 'semi' as vertex to DFS mapping. Set 'parent' to DFS parent.
|
||||
uint PhaseCFG::DFS( Tarjan *tarjan ) {
|
||||
Block *b = _broot;
|
||||
uint PhaseCFG::do_DFS(Tarjan *tarjan, uint rpo_counter) {
|
||||
Block* root_block = get_root_block();
|
||||
uint pre_order = 1;
|
||||
// Allocate stack of size _num_blocks+1 to avoid frequent realloc
|
||||
Block_Stack bstack(tarjan, _num_blocks+1);
|
||||
// Allocate stack of size number_of_blocks() + 1 to avoid frequent realloc
|
||||
Block_Stack bstack(tarjan, number_of_blocks() + 1);
|
||||
|
||||
// Push on stack the state for the first block
|
||||
bstack.push(pre_order, b);
|
||||
bstack.push(pre_order, root_block);
|
||||
++pre_order;
|
||||
|
||||
while (bstack.is_nonempty()) {
|
||||
if (!bstack.last_successor()) {
|
||||
// Walk over all successors in pre-order (DFS).
|
||||
Block *s = bstack.next_successor();
|
||||
if (s->_pre_order == 0) { // Check for no-pre-order, not-visited
|
||||
Block* next_block = bstack.next_successor();
|
||||
if (next_block->_pre_order == 0) { // Check for no-pre-order, not-visited
|
||||
// Push on stack the state of successor
|
||||
bstack.push(pre_order, s);
|
||||
bstack.push(pre_order, next_block);
|
||||
++pre_order;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Build a reverse post-order in the CFG _blocks array
|
||||
Block *stack_top = bstack.pop();
|
||||
stack_top->_rpo = --_rpo_ctr;
|
||||
stack_top->_rpo = --rpo_counter;
|
||||
_blocks.map(stack_top->_rpo, stack_top);
|
||||
}
|
||||
}
|
||||
return pre_order;
|
||||
}
|
||||
|
||||
//------------------------------COMPRESS---------------------------------------
|
||||
void Tarjan::COMPRESS()
|
||||
{
|
||||
assert( _ancestor != 0, "" );
|
||||
@ -303,14 +293,12 @@ void Tarjan::COMPRESS()
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------EVAL-------------------------------------------
|
||||
Tarjan *Tarjan::EVAL() {
|
||||
if( !_ancestor ) return _label;
|
||||
COMPRESS();
|
||||
return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
|
||||
}
|
||||
|
||||
//------------------------------LINK-------------------------------------------
|
||||
void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) {
|
||||
Tarjan *s = w;
|
||||
while( w->_label->_semi < s->_child->_label->_semi ) {
|
||||
@ -333,7 +321,6 @@ void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------setdepth---------------------------------------
|
||||
void Tarjan::setdepth( uint stack_size ) {
|
||||
Tarjan **top = NEW_RESOURCE_ARRAY(Tarjan*, stack_size);
|
||||
Tarjan **next = top;
|
||||
@ -362,8 +349,7 @@ void Tarjan::setdepth( uint stack_size ) {
|
||||
} while (last < top);
|
||||
}
|
||||
|
||||
//*********************** DOMINATORS ON THE SEA OF NODES***********************
|
||||
//------------------------------NTarjan----------------------------------------
|
||||
// Compute dominators on the Sea of Nodes form
|
||||
// A data structure that holds all the information needed to find dominators.
|
||||
struct NTarjan {
|
||||
Node *_control; // Control node associated with this info
|
||||
@ -396,7 +382,6 @@ struct NTarjan {
|
||||
#endif
|
||||
};
|
||||
|
||||
//------------------------------Dominator--------------------------------------
|
||||
// Compute the dominator tree of the sea of nodes. This version walks all CFG
|
||||
// nodes (using the is_CFG() call) and places them in a dominator tree. Thus,
|
||||
// it needs a count of the CFG nodes for the mapping table. This is the
|
||||
@ -517,7 +502,6 @@ void PhaseIdealLoop::Dominators() {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------DFS--------------------------------------------
|
||||
// Perform DFS search. Setup 'vertex' as DFS to vertex mapping. Setup
|
||||
// 'semi' as vertex to DFS mapping. Set 'parent' to DFS parent.
|
||||
int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder) {
|
||||
@ -560,7 +544,6 @@ int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uin
|
||||
return dfsnum;
|
||||
}
|
||||
|
||||
//------------------------------COMPRESS---------------------------------------
|
||||
void NTarjan::COMPRESS()
|
||||
{
|
||||
assert( _ancestor != 0, "" );
|
||||
@ -572,14 +555,12 @@ void NTarjan::COMPRESS()
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------EVAL-------------------------------------------
|
||||
NTarjan *NTarjan::EVAL() {
|
||||
if( !_ancestor ) return _label;
|
||||
COMPRESS();
|
||||
return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
|
||||
}
|
||||
|
||||
//------------------------------LINK-------------------------------------------
|
||||
void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) {
|
||||
NTarjan *s = w;
|
||||
while( w->_label->_semi < s->_child->_label->_semi ) {
|
||||
@ -602,7 +583,6 @@ void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------setdepth---------------------------------------
|
||||
void NTarjan::setdepth( uint stack_size, uint *dom_depth ) {
|
||||
NTarjan **top = NEW_RESOURCE_ARRAY(NTarjan*, stack_size);
|
||||
NTarjan **next = top;
|
||||
@ -631,7 +611,6 @@ void NTarjan::setdepth( uint stack_size, uint *dom_depth ) {
|
||||
} while (last < top);
|
||||
}
|
||||
|
||||
//------------------------------dump-------------------------------------------
|
||||
#ifndef PRODUCT
|
||||
void NTarjan::dump(int offset) const {
|
||||
// Dump the data from this node
|
||||
|
@ -121,27 +121,30 @@ void PhaseCFG::replace_block_proj_ctrl( Node *n ) {
|
||||
|
||||
//------------------------------schedule_pinned_nodes--------------------------
|
||||
// Set the basic block for Nodes pinned into blocks
|
||||
void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) {
|
||||
void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) {
|
||||
// Allocate node stack of size C->unique()+8 to avoid frequent realloc
|
||||
GrowableArray <Node *> spstack(C->unique()+8);
|
||||
GrowableArray <Node *> spstack(C->unique() + 8);
|
||||
spstack.push(_root);
|
||||
while ( spstack.is_nonempty() ) {
|
||||
Node *n = spstack.pop();
|
||||
if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited
|
||||
if( n->pinned() && !has_block(n)) { // Pinned? Nail it down!
|
||||
assert( n->in(0), "pinned Node must have Control" );
|
||||
while (spstack.is_nonempty()) {
|
||||
Node* node = spstack.pop();
|
||||
if (!visited.test_set(node->_idx)) { // Test node and flag it as visited
|
||||
if (node->pinned() && !has_block(node)) { // Pinned? Nail it down!
|
||||
assert(node->in(0), "pinned Node must have Control");
|
||||
// Before setting block replace block_proj control edge
|
||||
replace_block_proj_ctrl(n);
|
||||
Node *input = n->in(0);
|
||||
replace_block_proj_ctrl(node);
|
||||
Node* input = node->in(0);
|
||||
while (!input->is_block_start()) {
|
||||
input = input->in(0);
|
||||
}
|
||||
Block *b = get_block_for_node(input); // Basic block of controlling input
|
||||
schedule_node_into_block(n, b);
|
||||
Block* block = get_block_for_node(input); // Basic block of controlling input
|
||||
schedule_node_into_block(node, block);
|
||||
}
|
||||
for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs
|
||||
if( n->in(i) != NULL )
|
||||
spstack.push(n->in(i));
|
||||
|
||||
// process all inputs that are non NULL
|
||||
for (int i = node->req() - 1; i >= 0; --i) {
|
||||
if (node->in(i) != NULL) {
|
||||
spstack.push(node->in(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -205,32 +208,29 @@ static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) {
|
||||
// which all their inputs occur.
|
||||
bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
|
||||
// Allocate stack with enough space to avoid frequent realloc
|
||||
Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats
|
||||
// roots.push(_root); _root will be processed among C->top() inputs
|
||||
Node_Stack nstack(roots.Size() + 8);
|
||||
// _root will be processed among C->top() inputs
|
||||
roots.push(C->top());
|
||||
visited.set(C->top()->_idx);
|
||||
|
||||
while (roots.size() != 0) {
|
||||
// Use local variables nstack_top_n & nstack_top_i to cache values
|
||||
// on stack's top.
|
||||
Node *nstack_top_n = roots.pop();
|
||||
uint nstack_top_i = 0;
|
||||
//while_nstack_nonempty:
|
||||
while (true) {
|
||||
// Get parent node and next input's index from stack's top.
|
||||
Node *n = nstack_top_n;
|
||||
uint i = nstack_top_i;
|
||||
Node* parent_node = roots.pop();
|
||||
uint input_index = 0;
|
||||
|
||||
if (i == 0) {
|
||||
while (true) {
|
||||
if (input_index == 0) {
|
||||
// Fixup some control. Constants without control get attached
|
||||
// to root and nodes that use is_block_proj() nodes should be attached
|
||||
// to the region that starts their block.
|
||||
const Node *in0 = n->in(0);
|
||||
if (in0 != NULL) { // Control-dependent?
|
||||
replace_block_proj_ctrl(n);
|
||||
} else { // n->in(0) == NULL
|
||||
if (n->req() == 1) { // This guy is a constant with NO inputs?
|
||||
n->set_req(0, _root);
|
||||
const Node* control_input = parent_node->in(0);
|
||||
if (control_input != NULL) {
|
||||
replace_block_proj_ctrl(parent_node);
|
||||
} else {
|
||||
// Is a constant with NO inputs?
|
||||
if (parent_node->req() == 1) {
|
||||
parent_node->set_req(0, _root);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -239,37 +239,47 @@ bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
|
||||
// input is already in a block we quit following inputs (to avoid
|
||||
// cycles). Instead we put that Node on a worklist to be handled
|
||||
// later (since IT'S inputs may not have a block yet).
|
||||
bool done = true; // Assume all n's inputs will be processed
|
||||
while (i < n->len()) { // For all inputs
|
||||
Node *in = n->in(i); // Get input
|
||||
++i;
|
||||
if (in == NULL) continue; // Ignore NULL, missing inputs
|
||||
|
||||
// Assume all n's inputs will be processed
|
||||
bool done = true;
|
||||
|
||||
while (input_index < parent_node->len()) {
|
||||
Node* in = parent_node->in(input_index++);
|
||||
if (in == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int is_visited = visited.test_set(in->_idx);
|
||||
if (!has_block(in)) { // Missing block selection?
|
||||
if (!has_block(in)) {
|
||||
if (is_visited) {
|
||||
// assert( !visited.test(in->_idx), "did not schedule early" );
|
||||
return false;
|
||||
}
|
||||
nstack.push(n, i); // Save parent node and next input's index.
|
||||
nstack_top_n = in; // Process current input now.
|
||||
nstack_top_i = 0;
|
||||
done = false; // Not all n's inputs processed.
|
||||
break; // continue while_nstack_nonempty;
|
||||
} else if (!is_visited) { // Input not yet visited?
|
||||
roots.push(in); // Visit this guy later, using worklist
|
||||
// Save parent node and next input's index.
|
||||
nstack.push(parent_node, input_index);
|
||||
// Process current input now.
|
||||
parent_node = in;
|
||||
input_index = 0;
|
||||
// Not all n's inputs processed.
|
||||
done = false;
|
||||
break;
|
||||
} else if (!is_visited) {
|
||||
// Visit this guy later, using worklist
|
||||
roots.push(in);
|
||||
}
|
||||
}
|
||||
|
||||
if (done) {
|
||||
// All of n's inputs have been processed, complete post-processing.
|
||||
|
||||
// Some instructions are pinned into a block. These include Region,
|
||||
// Phi, Start, Return, and other control-dependent instructions and
|
||||
// any projections which depend on them.
|
||||
if (!n->pinned()) {
|
||||
if (!parent_node->pinned()) {
|
||||
// Set earliest legal block.
|
||||
map_node_to_block(n, find_deepest_input(n, this));
|
||||
Block* earliest_block = find_deepest_input(parent_node, this);
|
||||
map_node_to_block(parent_node, earliest_block);
|
||||
} else {
|
||||
assert(get_block_for_node(n) == get_block_for_node(n->in(0)), "Pinned Node should be at the same block as its control edge");
|
||||
assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge");
|
||||
}
|
||||
|
||||
if (nstack.is_empty()) {
|
||||
@ -278,12 +288,12 @@ bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
|
||||
break;
|
||||
}
|
||||
// Get saved parent node and next input's index.
|
||||
nstack_top_n = nstack.node();
|
||||
nstack_top_i = nstack.index();
|
||||
parent_node = nstack.node();
|
||||
input_index = nstack.index();
|
||||
nstack.pop();
|
||||
} // if (done)
|
||||
} // while (true)
|
||||
} // while (roots.size() != 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -847,7 +857,7 @@ Node *Node_Backward_Iterator::next() {
|
||||
|
||||
//------------------------------ComputeLatenciesBackwards----------------------
|
||||
// Compute the latency of all the instructions.
|
||||
void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) {
|
||||
void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) {
|
||||
#ifndef PRODUCT
|
||||
if (trace_opto_pipelining())
|
||||
tty->print("\n#---- ComputeLatenciesBackwards ----\n");
|
||||
@ -870,31 +880,34 @@ void PhaseCFG::partial_latency_of_defs(Node *n) {
|
||||
// Set the latency for this instruction
|
||||
#ifndef PRODUCT
|
||||
if (trace_opto_pipelining()) {
|
||||
tty->print("# latency_to_inputs: node_latency[%d] = %d for node",
|
||||
n->_idx, _node_latency->at_grow(n->_idx));
|
||||
tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
|
||||
dump();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (n->is_Proj())
|
||||
if (n->is_Proj()) {
|
||||
n = n->in(0);
|
||||
}
|
||||
|
||||
if (n->is_Root())
|
||||
if (n->is_Root()) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint nlen = n->len();
|
||||
uint use_latency = _node_latency->at_grow(n->_idx);
|
||||
uint use_latency = get_latency_for_node(n);
|
||||
uint use_pre_order = get_block_for_node(n)->_pre_order;
|
||||
|
||||
for ( uint j=0; j<nlen; j++ ) {
|
||||
for (uint j = 0; j < nlen; j++) {
|
||||
Node *def = n->in(j);
|
||||
|
||||
if (!def || def == n)
|
||||
if (!def || def == n) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Walk backwards thru projections
|
||||
if (def->is_Proj())
|
||||
if (def->is_Proj()) {
|
||||
def = def->in(0);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (trace_opto_pipelining()) {
|
||||
@ -907,22 +920,20 @@ void PhaseCFG::partial_latency_of_defs(Node *n) {
|
||||
Block *def_block = get_block_for_node(def);
|
||||
uint def_pre_order = def_block ? def_block->_pre_order : 0;
|
||||
|
||||
if ( (use_pre_order < def_pre_order) ||
|
||||
(use_pre_order == def_pre_order && n->is_Phi()) )
|
||||
if ((use_pre_order < def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
uint delta_latency = n->latency(j);
|
||||
uint current_latency = delta_latency + use_latency;
|
||||
|
||||
if (_node_latency->at_grow(def->_idx) < current_latency) {
|
||||
_node_latency->at_put_grow(def->_idx, current_latency);
|
||||
if (get_latency_for_node(def) < current_latency) {
|
||||
set_latency_for_node(def, current_latency);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (trace_opto_pipelining()) {
|
||||
tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
|
||||
use_latency, j, delta_latency, current_latency, def->_idx,
|
||||
_node_latency->at_grow(def->_idx));
|
||||
tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -957,7 +968,7 @@ int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
|
||||
return 0;
|
||||
|
||||
uint nlen = use->len();
|
||||
uint nl = _node_latency->at_grow(use->_idx);
|
||||
uint nl = get_latency_for_node(use);
|
||||
|
||||
for ( uint j=0; j<nlen; j++ ) {
|
||||
if (use->in(j) == n) {
|
||||
@ -992,8 +1003,7 @@ void PhaseCFG::latency_from_uses(Node *n) {
|
||||
// Set the latency for this instruction
|
||||
#ifndef PRODUCT
|
||||
if (trace_opto_pipelining()) {
|
||||
tty->print("# latency_from_outputs: node_latency[%d] = %d for node",
|
||||
n->_idx, _node_latency->at_grow(n->_idx));
|
||||
tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
|
||||
dump();
|
||||
}
|
||||
#endif
|
||||
@ -1006,7 +1016,7 @@ void PhaseCFG::latency_from_uses(Node *n) {
|
||||
if (latency < l) latency = l;
|
||||
}
|
||||
|
||||
_node_latency->at_put_grow(n->_idx, latency);
|
||||
set_latency_for_node(n, latency);
|
||||
}
|
||||
|
||||
//------------------------------hoist_to_cheaper_block-------------------------
|
||||
@ -1016,9 +1026,9 @@ Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
|
||||
const double delta = 1+PROB_UNLIKELY_MAG(4);
|
||||
Block* least = LCA;
|
||||
double least_freq = least->_freq;
|
||||
uint target = _node_latency->at_grow(self->_idx);
|
||||
uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx);
|
||||
uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
|
||||
uint target = get_latency_for_node(self);
|
||||
uint start_latency = get_latency_for_node(LCA->_nodes[0]);
|
||||
uint end_latency = get_latency_for_node(LCA->_nodes[LCA->end_idx()]);
|
||||
bool in_latency = (target <= start_latency);
|
||||
const Block* root_block = get_block_for_node(_root);
|
||||
|
||||
@ -1035,8 +1045,7 @@ Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (trace_opto_pipelining()) {
|
||||
tty->print("# Find cheaper block for latency %d: ",
|
||||
_node_latency->at_grow(self->_idx));
|
||||
tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self));
|
||||
self->dump();
|
||||
tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
|
||||
LCA->_pre_order,
|
||||
@ -1065,9 +1074,9 @@ Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
|
||||
if (mach && LCA == root_block)
|
||||
break;
|
||||
|
||||
uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx);
|
||||
uint start_lat = get_latency_for_node(LCA->_nodes[0]);
|
||||
uint end_idx = LCA->end_idx();
|
||||
uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx);
|
||||
uint end_lat = get_latency_for_node(LCA->_nodes[end_idx]);
|
||||
double LCA_freq = LCA->_freq;
|
||||
#ifndef PRODUCT
|
||||
if (trace_opto_pipelining()) {
|
||||
@ -1109,7 +1118,7 @@ Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
|
||||
tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
|
||||
}
|
||||
#endif
|
||||
_node_latency->at_put_grow(self->_idx, end_latency);
|
||||
set_latency_for_node(self, end_latency);
|
||||
partial_latency_of_defs(self);
|
||||
}
|
||||
|
||||
@ -1255,7 +1264,7 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) {
|
||||
} // end ScheduleLate
|
||||
|
||||
//------------------------------GlobalCodeMotion-------------------------------
|
||||
void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) {
|
||||
void PhaseCFG::global_code_motion() {
|
||||
ResourceMark rm;
|
||||
|
||||
#ifndef PRODUCT
|
||||
@ -1265,21 +1274,22 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_
|
||||
#endif
|
||||
|
||||
// Initialize the node to block mapping for things on the proj_list
|
||||
for (uint i = 0; i < proj_list.size(); i++) {
|
||||
unmap_node_from_block(proj_list[i]);
|
||||
for (uint i = 0; i < _matcher.number_of_projections(); i++) {
|
||||
unmap_node_from_block(_matcher.get_projection(i));
|
||||
}
|
||||
|
||||
// Set the basic block for Nodes pinned into blocks
|
||||
Arena *a = Thread::current()->resource_area();
|
||||
VectorSet visited(a);
|
||||
schedule_pinned_nodes( visited );
|
||||
Arena* arena = Thread::current()->resource_area();
|
||||
VectorSet visited(arena);
|
||||
schedule_pinned_nodes(visited);
|
||||
|
||||
// Find the earliest Block any instruction can be placed in. Some
|
||||
// instructions are pinned into Blocks. Unpinned instructions can
|
||||
// appear in last block in which all their inputs occur.
|
||||
visited.Clear();
|
||||
Node_List stack(a);
|
||||
stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list
|
||||
Node_List stack(arena);
|
||||
// Pre-grow the list
|
||||
stack.map((C->unique() >> 1) + 16, NULL);
|
||||
if (!schedule_early(visited, stack)) {
|
||||
// Bailout without retry
|
||||
C->record_method_not_compilable("early schedule failed");
|
||||
@ -1287,29 +1297,25 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_
|
||||
}
|
||||
|
||||
// Build Def-Use edges.
|
||||
proj_list.push(_root); // Add real root as another root
|
||||
proj_list.pop();
|
||||
|
||||
// Compute the latency information (via backwards walk) for all the
|
||||
// instructions in the graph
|
||||
_node_latency = new GrowableArray<uint>(); // resource_area allocation
|
||||
|
||||
if( C->do_scheduling() )
|
||||
ComputeLatenciesBackwards(visited, stack);
|
||||
if (C->do_scheduling()) {
|
||||
compute_latencies_backwards(visited, stack);
|
||||
}
|
||||
|
||||
// Now schedule all codes as LATE as possible. This is the LCA in the
|
||||
// dominator tree of all USES of a value. Pick the block with the least
|
||||
// loop nesting depth that is lowest in the dominator tree.
|
||||
// ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
|
||||
schedule_late(visited, stack);
|
||||
if( C->failing() ) {
|
||||
if (C->failing()) {
|
||||
// schedule_late fails only when graph is incorrect.
|
||||
assert(!VerifyGraphEdges, "verification should have failed");
|
||||
return;
|
||||
}
|
||||
|
||||
unique = C->unique();
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (trace_opto_pipelining()) {
|
||||
tty->print("\n---- Detect implicit null checks ----\n");
|
||||
@ -1332,10 +1338,11 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_
|
||||
// By reversing the loop direction we get a very minor gain on mpegaudio.
|
||||
// Feel free to revert to a forward loop for clarity.
|
||||
// for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
|
||||
for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) {
|
||||
Node *proj = matcher._null_check_tests[i ];
|
||||
Node *val = matcher._null_check_tests[i+1];
|
||||
get_block_for_node(proj)->implicit_null_check(this, proj, val, allowed_reasons);
|
||||
for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) {
|
||||
Node* proj = _matcher._null_check_tests[i];
|
||||
Node* val = _matcher._null_check_tests[i + 1];
|
||||
Block* block = get_block_for_node(proj);
|
||||
block->implicit_null_check(this, proj, val, allowed_reasons);
|
||||
// The implicit_null_check will only perform the transformation
|
||||
// if the null branch is truly uncommon, *and* it leads to an
|
||||
// uncommon trap. Combined with the too_many_traps guards
|
||||
@ -1352,11 +1359,11 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_
|
||||
|
||||
// Schedule locally. Right now a simple topological sort.
|
||||
// Later, do a real latency aware scheduler.
|
||||
uint max_idx = C->unique();
|
||||
GrowableArray<int> ready_cnt(max_idx, max_idx, -1);
|
||||
GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1);
|
||||
visited.Clear();
|
||||
for (uint i = 0; i < _num_blocks; i++) {
|
||||
if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
|
||||
for (uint i = 0; i < number_of_blocks(); i++) {
|
||||
Block* block = get_block(i);
|
||||
if (!block->schedule_local(this, _matcher, ready_cnt, visited)) {
|
||||
if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
|
||||
C->record_method_not_compilable("local schedule failed");
|
||||
}
|
||||
@ -1366,15 +1373,17 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_
|
||||
|
||||
// If we inserted any instructions between a Call and his CatchNode,
|
||||
// clone the instructions on all paths below the Catch.
|
||||
for (uint i = 0; i < _num_blocks; i++) {
|
||||
_blocks[i]->call_catch_cleanup(this, C);
|
||||
for (uint i = 0; i < number_of_blocks(); i++) {
|
||||
Block* block = get_block(i);
|
||||
block->call_catch_cleanup(this, C);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (trace_opto_pipelining()) {
|
||||
tty->print("\n---- After GlobalCodeMotion ----\n");
|
||||
for (uint i = 0; i < _num_blocks; i++) {
|
||||
_blocks[i]->dump();
|
||||
for (uint i = 0; i < number_of_blocks(); i++) {
|
||||
Block* block = get_block(i);
|
||||
block->dump();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -1382,10 +1391,29 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_
|
||||
_node_latency = (GrowableArray<uint> *)0xdeadbeef;
|
||||
}
|
||||
|
||||
bool PhaseCFG::do_global_code_motion() {
|
||||
|
||||
build_dominator_tree();
|
||||
if (C->failing()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
NOT_PRODUCT( C->verify_graph_edges(); )
|
||||
|
||||
estimate_block_frequency();
|
||||
|
||||
global_code_motion();
|
||||
|
||||
if (C->failing()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------Estimate_Block_Frequency-----------------------
|
||||
// Estimate block frequencies based on IfNode probabilities.
|
||||
void PhaseCFG::Estimate_Block_Frequency() {
|
||||
void PhaseCFG::estimate_block_frequency() {
|
||||
|
||||
// Force conditional branches leading to uncommon traps to be unlikely,
|
||||
// not because we get to the uncommon_trap with less relative frequency,
|
||||
@ -1393,7 +1421,7 @@ void PhaseCFG::Estimate_Block_Frequency() {
|
||||
// there once.
|
||||
if (C->do_freq_based_layout()) {
|
||||
Block_List worklist;
|
||||
Block* root_blk = _blocks[0];
|
||||
Block* root_blk = get_block(0);
|
||||
for (uint i = 1; i < root_blk->num_preds(); i++) {
|
||||
Block *pb = get_block_for_node(root_blk->pred(i));
|
||||
if (pb->has_uncommon_code()) {
|
||||
@ -1402,7 +1430,9 @@ void PhaseCFG::Estimate_Block_Frequency() {
|
||||
}
|
||||
while (worklist.size() > 0) {
|
||||
Block* uct = worklist.pop();
|
||||
if (uct == _broot) continue;
|
||||
if (uct == get_root_block()) {
|
||||
continue;
|
||||
}
|
||||
for (uint i = 1; i < uct->num_preds(); i++) {
|
||||
Block *pb = get_block_for_node(uct->pred(i));
|
||||
if (pb->_num_succs == 1) {
|
||||
@ -1426,12 +1456,12 @@ void PhaseCFG::Estimate_Block_Frequency() {
|
||||
_root_loop->scale_freq();
|
||||
|
||||
// Save outmost loop frequency for LRG frequency threshold
|
||||
_outer_loop_freq = _root_loop->outer_loop_freq();
|
||||
_outer_loop_frequency = _root_loop->outer_loop_freq();
|
||||
|
||||
// force paths ending at uncommon traps to be infrequent
|
||||
if (!C->do_freq_based_layout()) {
|
||||
Block_List worklist;
|
||||
Block* root_blk = _blocks[0];
|
||||
Block* root_blk = get_block(0);
|
||||
for (uint i = 1; i < root_blk->num_preds(); i++) {
|
||||
Block *pb = get_block_for_node(root_blk->pred(i));
|
||||
if (pb->has_uncommon_code()) {
|
||||
@ -1451,8 +1481,8 @@ void PhaseCFG::Estimate_Block_Frequency() {
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
for (uint i = 0; i < _num_blocks; i++ ) {
|
||||
Block *b = _blocks[i];
|
||||
for (uint i = 0; i < number_of_blocks(); i++) {
|
||||
Block* b = get_block(i);
|
||||
assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency");
|
||||
}
|
||||
#endif
|
||||
@ -1476,16 +1506,16 @@ void PhaseCFG::Estimate_Block_Frequency() {
|
||||
CFGLoop* PhaseCFG::create_loop_tree() {
|
||||
|
||||
#ifdef ASSERT
|
||||
assert( _blocks[0] == _broot, "" );
|
||||
for (uint i = 0; i < _num_blocks; i++ ) {
|
||||
Block *b = _blocks[i];
|
||||
assert(get_block(0) == get_root_block(), "first block should be root block");
|
||||
for (uint i = 0; i < number_of_blocks(); i++) {
|
||||
Block* block = get_block(i);
|
||||
// Check that _loop field are clear...we could clear them if not.
|
||||
assert(b->_loop == NULL, "clear _loop expected");
|
||||
assert(block->_loop == NULL, "clear _loop expected");
|
||||
// Sanity check that the RPO numbering is reflected in the _blocks array.
|
||||
// It doesn't have to be for the loop tree to be built, but if it is not,
|
||||
// then the blocks have been reordered since dom graph building...which
|
||||
// may question the RPO numbering
|
||||
assert(b->_rpo == i, "unexpected reverse post order number");
|
||||
assert(block->_rpo == i, "unexpected reverse post order number");
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1495,11 +1525,11 @@ CFGLoop* PhaseCFG::create_loop_tree() {
|
||||
Block_List worklist;
|
||||
|
||||
// Assign blocks to loops
|
||||
for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block
|
||||
Block *b = _blocks[i];
|
||||
for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block
|
||||
Block* block = get_block(i);
|
||||
|
||||
if (b->head()->is_Loop()) {
|
||||
Block* loop_head = b;
|
||||
if (block->head()->is_Loop()) {
|
||||
Block* loop_head = block;
|
||||
assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
|
||||
Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
|
||||
Block* tail = get_block_for_node(tail_n);
|
||||
@ -1533,23 +1563,23 @@ CFGLoop* PhaseCFG::create_loop_tree() {
|
||||
|
||||
// Create a member list for each loop consisting
|
||||
// of both blocks and (immediate child) loops.
|
||||
for (uint i = 0; i < _num_blocks; i++) {
|
||||
Block *b = _blocks[i];
|
||||
CFGLoop* lp = b->_loop;
|
||||
for (uint i = 0; i < number_of_blocks(); i++) {
|
||||
Block* block = get_block(i);
|
||||
CFGLoop* lp = block->_loop;
|
||||
if (lp == NULL) {
|
||||
// Not assigned to a loop. Add it to the method's pseudo loop.
|
||||
b->_loop = root_loop;
|
||||
block->_loop = root_loop;
|
||||
lp = root_loop;
|
||||
}
|
||||
if (lp == root_loop || b != lp->head()) { // loop heads are already members
|
||||
lp->add_member(b);
|
||||
if (lp == root_loop || block != lp->head()) { // loop heads are already members
|
||||
lp->add_member(block);
|
||||
}
|
||||
if (lp != root_loop) {
|
||||
if (lp->parent() == NULL) {
|
||||
// Not a nested loop. Make it a child of the method's pseudo loop.
|
||||
root_loop->add_nested_loop(lp);
|
||||
}
|
||||
if (b == lp->head()) {
|
||||
if (block == lp->head()) {
|
||||
// Add nested loop to member list of parent loop.
|
||||
lp->parent()->add_member(lp);
|
||||
}
|
||||
|
@ -416,7 +416,7 @@ void IdealGraphPrinter::visit_node(Node *n, bool edges, VectorSet* temp_set) {
|
||||
if (C->cfg() != NULL) {
|
||||
Block* block = C->cfg()->get_block_for_node(node);
|
||||
if (block == NULL) {
|
||||
print_prop("block", C->cfg()->_blocks[0]->_pre_order);
|
||||
print_prop("block", C->cfg()->get_block(0)->_pre_order);
|
||||
} else {
|
||||
print_prop("block", block->_pre_order);
|
||||
}
|
||||
@ -637,10 +637,10 @@ void IdealGraphPrinter::walk_nodes(Node *start, bool edges, VectorSet* temp_set)
|
||||
if (C->cfg() != NULL) {
|
||||
// once we have a CFG there are some nodes that aren't really
|
||||
// reachable but are in the CFG so add them here.
|
||||
for (uint i = 0; i < C->cfg()->_blocks.size(); i++) {
|
||||
Block *b = C->cfg()->_blocks[i];
|
||||
for (uint s = 0; s < b->_nodes.size(); s++) {
|
||||
nodeStack.push(b->_nodes[s]);
|
||||
for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) {
|
||||
Block* block = C->cfg()->get_block(i);
|
||||
for (uint s = 0; s < block->_nodes.size(); s++) {
|
||||
nodeStack.push(block->_nodes[s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -698,24 +698,24 @@ void IdealGraphPrinter::print(Compile* compile, const char *name, Node *node, in
|
||||
tail(EDGES_ELEMENT);
|
||||
if (C->cfg() != NULL) {
|
||||
head(CONTROL_FLOW_ELEMENT);
|
||||
for (uint i = 0; i < C->cfg()->_blocks.size(); i++) {
|
||||
Block *b = C->cfg()->_blocks[i];
|
||||
for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) {
|
||||
Block* block = C->cfg()->get_block(i);
|
||||
begin_head(BLOCK_ELEMENT);
|
||||
print_attr(BLOCK_NAME_PROPERTY, b->_pre_order);
|
||||
print_attr(BLOCK_NAME_PROPERTY, block->_pre_order);
|
||||
end_head();
|
||||
|
||||
head(SUCCESSORS_ELEMENT);
|
||||
for (uint s = 0; s < b->_num_succs; s++) {
|
||||
for (uint s = 0; s < block->_num_succs; s++) {
|
||||
begin_elem(SUCCESSOR_ELEMENT);
|
||||
print_attr(BLOCK_NAME_PROPERTY, b->_succs[s]->_pre_order);
|
||||
print_attr(BLOCK_NAME_PROPERTY, block->_succs[s]->_pre_order);
|
||||
end_elem();
|
||||
}
|
||||
tail(SUCCESSORS_ELEMENT);
|
||||
|
||||
head(NODES_ELEMENT);
|
||||
for (uint s = 0; s < b->_nodes.size(); s++) {
|
||||
for (uint s = 0; s < block->_nodes.size(); s++) {
|
||||
begin_elem(NODE_ELEMENT);
|
||||
print_attr(NODE_ID_PROPERTY, get_node_id(b->_nodes[s]));
|
||||
print_attr(NODE_ID_PROPERTY, get_node_id(block->_nodes[s]));
|
||||
end_elem();
|
||||
}
|
||||
tail(NODES_ELEMENT);
|
||||
|
@ -37,12 +37,9 @@
|
||||
#include "opto/memnode.hpp"
|
||||
#include "opto/opcodes.hpp"
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------IFG--------------------------------------------
|
||||
PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
|
||||
}
|
||||
|
||||
//------------------------------init-------------------------------------------
|
||||
void PhaseIFG::init( uint maxlrg ) {
|
||||
_maxlrg = maxlrg;
|
||||
_yanked = new (_arena) VectorSet(_arena);
|
||||
@ -59,7 +56,6 @@ void PhaseIFG::init( uint maxlrg ) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------add--------------------------------------------
|
||||
// Add edge between vertices a & b. These are sorted (triangular matrix),
|
||||
// then the smaller number is inserted in the larger numbered array.
|
||||
int PhaseIFG::add_edge( uint a, uint b ) {
|
||||
@ -71,7 +67,6 @@ int PhaseIFG::add_edge( uint a, uint b ) {
|
||||
return _adjs[a].insert( b );
|
||||
}
|
||||
|
||||
//------------------------------add_vector-------------------------------------
|
||||
// Add an edge between 'a' and everything in the vector.
|
||||
void PhaseIFG::add_vector( uint a, IndexSet *vec ) {
|
||||
// IFG is triangular, so do the inserts where 'a' < 'b'.
|
||||
@ -86,7 +81,6 @@ void PhaseIFG::add_vector( uint a, IndexSet *vec ) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------test-------------------------------------------
|
||||
// Is there an edge between a and b?
|
||||
int PhaseIFG::test_edge( uint a, uint b ) const {
|
||||
// Sort a and b, so that a is larger
|
||||
@ -95,7 +89,6 @@ int PhaseIFG::test_edge( uint a, uint b ) const {
|
||||
return _adjs[a].member(b);
|
||||
}
|
||||
|
||||
//------------------------------SquareUp---------------------------------------
|
||||
// Convert triangular matrix to square matrix
|
||||
void PhaseIFG::SquareUp() {
|
||||
assert( !_is_square, "only on triangular" );
|
||||
@ -111,7 +104,6 @@ void PhaseIFG::SquareUp() {
|
||||
_is_square = true;
|
||||
}
|
||||
|
||||
//------------------------------Compute_Effective_Degree-----------------------
|
||||
// Compute effective degree in bulk
|
||||
void PhaseIFG::Compute_Effective_Degree() {
|
||||
assert( _is_square, "only on square" );
|
||||
@ -120,7 +112,6 @@ void PhaseIFG::Compute_Effective_Degree() {
|
||||
lrgs(i).set_degree(effective_degree(i));
|
||||
}
|
||||
|
||||
//------------------------------test_edge_sq-----------------------------------
|
||||
int PhaseIFG::test_edge_sq( uint a, uint b ) const {
|
||||
assert( _is_square, "only on square" );
|
||||
// Swap, so that 'a' has the lesser count. Then binary search is on
|
||||
@ -130,7 +121,6 @@ int PhaseIFG::test_edge_sq( uint a, uint b ) const {
|
||||
return _adjs[a].member(b);
|
||||
}
|
||||
|
||||
//------------------------------Union------------------------------------------
|
||||
// Union edges of B into A
|
||||
void PhaseIFG::Union( uint a, uint b ) {
|
||||
assert( _is_square, "only on square" );
|
||||
@ -146,7 +136,6 @@ void PhaseIFG::Union( uint a, uint b ) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------remove_node------------------------------------
|
||||
// Yank a Node and all connected edges from the IFG. Return a
|
||||
// list of neighbors (edges) yanked.
|
||||
IndexSet *PhaseIFG::remove_node( uint a ) {
|
||||
@ -165,7 +154,6 @@ IndexSet *PhaseIFG::remove_node( uint a ) {
|
||||
return neighbors(a);
|
||||
}
|
||||
|
||||
//------------------------------re_insert--------------------------------------
|
||||
// Re-insert a yanked Node.
|
||||
void PhaseIFG::re_insert( uint a ) {
|
||||
assert( _is_square, "only on square" );
|
||||
@ -180,7 +168,6 @@ void PhaseIFG::re_insert( uint a ) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------compute_degree---------------------------------
|
||||
// Compute the degree between 2 live ranges. If both live ranges are
|
||||
// aligned-adjacent powers-of-2 then we use the MAX size. If either is
|
||||
// mis-aligned (or for Fat-Projections, not-adjacent) then we have to
|
||||
@ -196,7 +183,6 @@ int LRG::compute_degree( LRG &l ) const {
|
||||
return tmp;
|
||||
}
|
||||
|
||||
//------------------------------effective_degree-------------------------------
|
||||
// Compute effective degree for this live range. If both live ranges are
|
||||
// aligned-adjacent powers-of-2 then we use the MAX size. If either is
|
||||
// mis-aligned (or for Fat-Projections, not-adjacent) then we have to
|
||||
@ -221,7 +207,6 @@ int PhaseIFG::effective_degree( uint lidx ) const {
|
||||
|
||||
|
||||
#ifndef PRODUCT
|
||||
//------------------------------dump-------------------------------------------
|
||||
void PhaseIFG::dump() const {
|
||||
tty->print_cr("-- Interference Graph --%s--",
|
||||
_is_square ? "square" : "triangular" );
|
||||
@ -260,7 +245,6 @@ void PhaseIFG::dump() const {
|
||||
tty->print("\n");
|
||||
}
|
||||
|
||||
//------------------------------stats------------------------------------------
|
||||
void PhaseIFG::stats() const {
|
||||
ResourceMark rm;
|
||||
int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
|
||||
@ -276,7 +260,6 @@ void PhaseIFG::stats() const {
|
||||
tty->print_cr("");
|
||||
}
|
||||
|
||||
//------------------------------verify-----------------------------------------
|
||||
void PhaseIFG::verify( const PhaseChaitin *pc ) const {
|
||||
// IFG is square, sorted and no need for Find
|
||||
for( uint i = 0; i < _maxlrg; i++ ) {
|
||||
@ -298,7 +281,6 @@ void PhaseIFG::verify( const PhaseChaitin *pc ) const {
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------interfere_with_live----------------------------
|
||||
// Interfere this register with everything currently live. Use the RegMasks
|
||||
// to trim the set of possible interferences. Return a count of register-only
|
||||
// interferences as an estimate of register pressure.
|
||||
@ -315,7 +297,6 @@ void PhaseChaitin::interfere_with_live( uint r, IndexSet *liveout ) {
|
||||
_ifg->add_edge( r, l );
|
||||
}
|
||||
|
||||
//------------------------------build_ifg_virtual------------------------------
|
||||
// Actually build the interference graph. Uses virtual registers only, no
|
||||
// physical register masks. This allows me to be very aggressive when
|
||||
// coalescing copies. Some of this aggressiveness will have to be undone
|
||||
@ -325,9 +306,9 @@ void PhaseChaitin::interfere_with_live( uint r, IndexSet *liveout ) {
|
||||
void PhaseChaitin::build_ifg_virtual( ) {
|
||||
|
||||
// For all blocks (in any order) do...
|
||||
for( uint i=0; i<_cfg._num_blocks; i++ ) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
IndexSet *liveout = _live->live(b);
|
||||
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
Block* block = _cfg.get_block(i);
|
||||
IndexSet* liveout = _live->live(block);
|
||||
|
||||
// The IFG is built by a single reverse pass over each basic block.
|
||||
// Starting with the known live-out set, we remove things that get
|
||||
@ -337,8 +318,8 @@ void PhaseChaitin::build_ifg_virtual( ) {
|
||||
// The defined value interferes with everything currently live. The
|
||||
// value is then removed from the live-ness set and it's inputs are
|
||||
// added to the live-ness set.
|
||||
for( uint j = b->end_idx() + 1; j > 1; j-- ) {
|
||||
Node *n = b->_nodes[j-1];
|
||||
for (uint j = block->end_idx() + 1; j > 1; j--) {
|
||||
Node* n = block->_nodes[j - 1];
|
||||
|
||||
// Get value being defined
|
||||
uint r = _lrg_map.live_range_id(n);
|
||||
@ -408,7 +389,6 @@ void PhaseChaitin::build_ifg_virtual( ) {
|
||||
} // End of forall blocks
|
||||
}
|
||||
|
||||
//------------------------------count_int_pressure-----------------------------
|
||||
uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
|
||||
IndexSetIterator elements(liveout);
|
||||
uint lidx;
|
||||
@ -424,7 +404,6 @@ uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
|
||||
return cnt;
|
||||
}
|
||||
|
||||
//------------------------------count_float_pressure---------------------------
|
||||
uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
|
||||
IndexSetIterator elements(liveout);
|
||||
uint lidx;
|
||||
@ -438,7 +417,6 @@ uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
|
||||
return cnt;
|
||||
}
|
||||
|
||||
//------------------------------lower_pressure---------------------------------
|
||||
// Adjust register pressure down by 1. Capture last hi-to-low transition,
|
||||
static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) {
|
||||
if (lrg->mask().is_UP() && lrg->mask_size()) {
|
||||
@ -460,40 +438,41 @@ static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------build_ifg_physical-----------------------------
|
||||
// Build the interference graph using physical registers when available.
|
||||
// That is, if 2 live ranges are simultaneously alive but in their acceptable
|
||||
// register sets do not overlap, then they do not interfere.
|
||||
uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
NOT_PRODUCT( Compile::TracePhase t3("buildIFG", &_t_buildIFGphysical, TimeCompiler); )
|
||||
|
||||
uint spill_reg = LRG::SPILL_REG;
|
||||
uint must_spill = 0;
|
||||
|
||||
// For all blocks (in any order) do...
|
||||
for( uint i = 0; i < _cfg._num_blocks; i++ ) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
Block* block = _cfg.get_block(i);
|
||||
// Clone (rather than smash in place) the liveout info, so it is alive
|
||||
// for the "collect_gc_info" phase later.
|
||||
IndexSet liveout(_live->live(b));
|
||||
uint last_inst = b->end_idx();
|
||||
IndexSet liveout(_live->live(block));
|
||||
uint last_inst = block->end_idx();
|
||||
// Compute first nonphi node index
|
||||
uint first_inst;
|
||||
for( first_inst = 1; first_inst < last_inst; first_inst++ )
|
||||
if( !b->_nodes[first_inst]->is_Phi() )
|
||||
for (first_inst = 1; first_inst < last_inst; first_inst++) {
|
||||
if (!block->_nodes[first_inst]->is_Phi()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Spills could be inserted before CreateEx node which should be
|
||||
// first instruction in block after Phis. Move CreateEx up.
|
||||
for( uint insidx = first_inst; insidx < last_inst; insidx++ ) {
|
||||
Node *ex = b->_nodes[insidx];
|
||||
if( ex->is_SpillCopy() ) continue;
|
||||
if( insidx > first_inst && ex->is_Mach() &&
|
||||
ex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
|
||||
for (uint insidx = first_inst; insidx < last_inst; insidx++) {
|
||||
Node *ex = block->_nodes[insidx];
|
||||
if (ex->is_SpillCopy()) {
|
||||
continue;
|
||||
}
|
||||
if (insidx > first_inst && ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) {
|
||||
// If the CreateEx isn't above all the MachSpillCopies
|
||||
// then move it to the top.
|
||||
b->_nodes.remove(insidx);
|
||||
b->_nodes.insert(first_inst, ex);
|
||||
block->_nodes.remove(insidx);
|
||||
block->_nodes.insert(first_inst, ex);
|
||||
}
|
||||
// Stop once a CreateEx or any other node is found
|
||||
break;
|
||||
@ -503,12 +482,12 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
uint pressure[2], hrp_index[2];
|
||||
pressure[0] = pressure[1] = 0;
|
||||
hrp_index[0] = hrp_index[1] = last_inst+1;
|
||||
b->_reg_pressure = b->_freg_pressure = 0;
|
||||
block->_reg_pressure = block->_freg_pressure = 0;
|
||||
// Liveout things are presumed live for the whole block. We accumulate
|
||||
// 'area' accordingly. If they get killed in the block, we'll subtract
|
||||
// the unused part of the block from the area.
|
||||
int inst_count = last_inst - first_inst;
|
||||
double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
|
||||
double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
|
||||
assert(!(cost < 0.0), "negative spill cost" );
|
||||
IndexSetIterator elements(&liveout);
|
||||
uint lidx;
|
||||
@ -519,13 +498,15 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
if (lrg.mask().is_UP() && lrg.mask_size()) {
|
||||
if (lrg._is_float || lrg._is_vector) { // Count float pressure
|
||||
pressure[1] += lrg.reg_pressure();
|
||||
if( pressure[1] > b->_freg_pressure )
|
||||
b->_freg_pressure = pressure[1];
|
||||
if (pressure[1] > block->_freg_pressure) {
|
||||
block->_freg_pressure = pressure[1];
|
||||
}
|
||||
// Count int pressure, but do not count the SP, flags
|
||||
} else if( lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
|
||||
} else if(lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) {
|
||||
pressure[0] += lrg.reg_pressure();
|
||||
if( pressure[0] > b->_reg_pressure )
|
||||
b->_reg_pressure = pressure[0];
|
||||
if (pressure[0] > block->_reg_pressure) {
|
||||
block->_reg_pressure = pressure[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -541,8 +522,8 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
// value is then removed from the live-ness set and it's inputs are added
|
||||
// to the live-ness set.
|
||||
uint j;
|
||||
for( j = last_inst + 1; j > 1; j-- ) {
|
||||
Node *n = b->_nodes[j - 1];
|
||||
for (j = last_inst + 1; j > 1; j--) {
|
||||
Node* n = block->_nodes[j - 1];
|
||||
|
||||
// Get value being defined
|
||||
uint r = _lrg_map.live_range_id(n);
|
||||
@ -551,7 +532,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
if(r) {
|
||||
// A DEF normally costs block frequency; rematerialized values are
|
||||
// removed from the DEF sight, so LOWER costs here.
|
||||
lrgs(r)._cost += n->rematerialize() ? 0 : b->_freq;
|
||||
lrgs(r)._cost += n->rematerialize() ? 0 : block->_freq;
|
||||
|
||||
// If it is not live, then this instruction is dead. Probably caused
|
||||
// by spilling and rematerialization. Who cares why, yank this baby.
|
||||
@ -560,7 +541,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
if( !n->is_Proj() ||
|
||||
// Could also be a flags-projection of a dead ADD or such.
|
||||
(_lrg_map.live_range_id(def) && !liveout.member(_lrg_map.live_range_id(def)))) {
|
||||
b->_nodes.remove(j - 1);
|
||||
block->_nodes.remove(j - 1);
|
||||
if (lrgs(r)._def == n) {
|
||||
lrgs(r)._def = 0;
|
||||
}
|
||||
@ -580,21 +561,21 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
RegMask itmp = lrgs(r).mask();
|
||||
itmp.AND(*Matcher::idealreg2regmask[Op_RegI]);
|
||||
int iregs = itmp.Size();
|
||||
if( pressure[0]+iregs > b->_reg_pressure )
|
||||
b->_reg_pressure = pressure[0]+iregs;
|
||||
if( pressure[0] <= (uint)INTPRESSURE &&
|
||||
pressure[0]+iregs > (uint)INTPRESSURE ) {
|
||||
hrp_index[0] = j-1;
|
||||
if (pressure[0]+iregs > block->_reg_pressure) {
|
||||
block->_reg_pressure = pressure[0] + iregs;
|
||||
}
|
||||
if (pressure[0] <= (uint)INTPRESSURE && pressure[0] + iregs > (uint)INTPRESSURE) {
|
||||
hrp_index[0] = j - 1;
|
||||
}
|
||||
// Count the float-only registers
|
||||
RegMask ftmp = lrgs(r).mask();
|
||||
ftmp.AND(*Matcher::idealreg2regmask[Op_RegD]);
|
||||
int fregs = ftmp.Size();
|
||||
if( pressure[1]+fregs > b->_freg_pressure )
|
||||
b->_freg_pressure = pressure[1]+fregs;
|
||||
if( pressure[1] <= (uint)FLOATPRESSURE &&
|
||||
pressure[1]+fregs > (uint)FLOATPRESSURE ) {
|
||||
hrp_index[1] = j-1;
|
||||
if (pressure[1] + fregs > block->_freg_pressure) {
|
||||
block->_freg_pressure = pressure[1] + fregs;
|
||||
}
|
||||
if(pressure[1] <= (uint)FLOATPRESSURE && pressure[1]+fregs > (uint)FLOATPRESSURE) {
|
||||
hrp_index[1] = j - 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -607,7 +588,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
if( n->is_SpillCopy()
|
||||
&& lrgs(r).is_singledef() // MultiDef live range can still split
|
||||
&& n->outcnt() == 1 // and use must be in this block
|
||||
&& _cfg.get_block_for_node(n->unique_out()) == b ) {
|
||||
&& _cfg.get_block_for_node(n->unique_out()) == block) {
|
||||
// All single-use MachSpillCopy(s) that immediately precede their
|
||||
// use must color early. If a longer live range steals their
|
||||
// color, the spill copy will split and may push another spill copy
|
||||
@ -617,14 +598,16 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
//
|
||||
|
||||
Node *single_use = n->unique_out();
|
||||
assert( b->find_node(single_use) >= j, "Use must be later in block");
|
||||
assert(block->find_node(single_use) >= j, "Use must be later in block");
|
||||
// Use can be earlier in block if it is a Phi, but then I should be a MultiDef
|
||||
|
||||
// Find first non SpillCopy 'm' that follows the current instruction
|
||||
// (j - 1) is index for current instruction 'n'
|
||||
Node *m = n;
|
||||
for( uint i = j; i <= last_inst && m->is_SpillCopy(); ++i ) { m = b->_nodes[i]; }
|
||||
if( m == single_use ) {
|
||||
for (uint i = j; i <= last_inst && m->is_SpillCopy(); ++i) {
|
||||
m = block->_nodes[i];
|
||||
}
|
||||
if (m == single_use) {
|
||||
lrgs(r)._area = 0.0;
|
||||
}
|
||||
}
|
||||
@ -633,7 +616,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
if( liveout.remove(r) ) {
|
||||
// Adjust register pressure.
|
||||
// Capture last hi-to-lo pressure transition
|
||||
lower_pressure( &lrgs(r), j-1, b, pressure, hrp_index );
|
||||
lower_pressure(&lrgs(r), j - 1, block, pressure, hrp_index);
|
||||
assert( pressure[0] == count_int_pressure (&liveout), "" );
|
||||
assert( pressure[1] == count_float_pressure(&liveout), "" );
|
||||
}
|
||||
@ -646,7 +629,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
if (liveout.remove(x)) {
|
||||
lrgs(x)._area -= cost;
|
||||
// Adjust register pressure.
|
||||
lower_pressure(&lrgs(x), j-1, b, pressure, hrp_index);
|
||||
lower_pressure(&lrgs(x), j - 1, block, pressure, hrp_index);
|
||||
assert( pressure[0] == count_int_pressure (&liveout), "" );
|
||||
assert( pressure[1] == count_float_pressure(&liveout), "" );
|
||||
}
|
||||
@ -718,7 +701,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
|
||||
// Area remaining in the block
|
||||
inst_count--;
|
||||
cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
|
||||
cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
|
||||
|
||||
// Make all inputs live
|
||||
if( !n->is_Phi() ) { // Phi function uses come from prior block
|
||||
@ -743,7 +726,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
if (k < debug_start) {
|
||||
// A USE costs twice block frequency (once for the Load, once
|
||||
// for a Load-delay). Rematerialized uses only cost once.
|
||||
lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq + b->_freq));
|
||||
lrg._cost += (def->rematerialize() ? block->_freq : (block->_freq + block->_freq));
|
||||
}
|
||||
// It is live now
|
||||
if (liveout.insert(x)) {
|
||||
@ -753,12 +736,14 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
if (lrg.mask().is_UP() && lrg.mask_size()) {
|
||||
if (lrg._is_float || lrg._is_vector) {
|
||||
pressure[1] += lrg.reg_pressure();
|
||||
if( pressure[1] > b->_freg_pressure )
|
||||
b->_freg_pressure = pressure[1];
|
||||
if (pressure[1] > block->_freg_pressure) {
|
||||
block->_freg_pressure = pressure[1];
|
||||
}
|
||||
} else if( lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
|
||||
pressure[0] += lrg.reg_pressure();
|
||||
if( pressure[0] > b->_reg_pressure )
|
||||
b->_reg_pressure = pressure[0];
|
||||
if (pressure[0] > block->_reg_pressure) {
|
||||
block->_reg_pressure = pressure[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
assert( pressure[0] == count_int_pressure (&liveout), "" );
|
||||
@ -772,44 +757,47 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
|
||||
// If we run off the top of the block with high pressure and
|
||||
// never see a hi-to-low pressure transition, just record that
|
||||
// the whole block is high pressure.
|
||||
if( pressure[0] > (uint)INTPRESSURE ) {
|
||||
if (pressure[0] > (uint)INTPRESSURE) {
|
||||
hrp_index[0] = 0;
|
||||
if( pressure[0] > b->_reg_pressure )
|
||||
b->_reg_pressure = pressure[0];
|
||||
if (pressure[0] > block->_reg_pressure) {
|
||||
block->_reg_pressure = pressure[0];
|
||||
}
|
||||
}
|
||||
if( pressure[1] > (uint)FLOATPRESSURE ) {
|
||||
if (pressure[1] > (uint)FLOATPRESSURE) {
|
||||
hrp_index[1] = 0;
|
||||
if( pressure[1] > b->_freg_pressure )
|
||||
b->_freg_pressure = pressure[1];
|
||||
if (pressure[1] > block->_freg_pressure) {
|
||||
block->_freg_pressure = pressure[1];
|
||||
}
|
||||
}
|
||||
|
||||
// Compute high pressure indice; avoid landing in the middle of projnodes
|
||||
j = hrp_index[0];
|
||||
if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
|
||||
Node *cur = b->_nodes[j];
|
||||
while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
|
||||
if (j < block->_nodes.size() && j < block->end_idx() + 1) {
|
||||
Node* cur = block->_nodes[j];
|
||||
while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
|
||||
j--;
|
||||
cur = b->_nodes[j];
|
||||
cur = block->_nodes[j];
|
||||
}
|
||||
}
|
||||
b->_ihrp_index = j;
|
||||
block->_ihrp_index = j;
|
||||
j = hrp_index[1];
|
||||
if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
|
||||
Node *cur = b->_nodes[j];
|
||||
while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
|
||||
if (j < block->_nodes.size() && j < block->end_idx() + 1) {
|
||||
Node* cur = block->_nodes[j];
|
||||
while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
|
||||
j--;
|
||||
cur = b->_nodes[j];
|
||||
cur = block->_nodes[j];
|
||||
}
|
||||
}
|
||||
b->_fhrp_index = j;
|
||||
block->_fhrp_index = j;
|
||||
|
||||
#ifndef PRODUCT
|
||||
// Gather Register Pressure Statistics
|
||||
if( PrintOptoStatistics ) {
|
||||
if( b->_reg_pressure > (uint)INTPRESSURE || b->_freg_pressure > (uint)FLOATPRESSURE )
|
||||
if (block->_reg_pressure > (uint)INTPRESSURE || block->_freg_pressure > (uint)FLOATPRESSURE) {
|
||||
_high_pressure++;
|
||||
else
|
||||
} else {
|
||||
_low_pressure++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} // End of for all blocks
|
||||
|
@ -501,7 +501,7 @@ Node *Block::select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &read
|
||||
n_choice = 1;
|
||||
}
|
||||
|
||||
uint n_latency = cfg->_node_latency->at_grow(n->_idx);
|
||||
uint n_latency = cfg->get_latency_for_node(n);
|
||||
uint n_score = n->req(); // Many inputs get high score to break ties
|
||||
|
||||
// Keep best latency found
|
||||
@ -797,7 +797,7 @@ bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &
|
||||
Node *n = _nodes[j];
|
||||
int idx = n->_idx;
|
||||
tty->print("# ready cnt:%3d ", ready_cnt.at(idx));
|
||||
tty->print("latency:%3d ", cfg->_node_latency->at_grow(idx));
|
||||
tty->print("latency:%3d ", cfg->get_latency_for_node(n));
|
||||
tty->print("%4d: %s\n", idx, n->Name());
|
||||
}
|
||||
}
|
||||
@ -825,7 +825,7 @@ bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &
|
||||
#ifndef PRODUCT
|
||||
if (cfg->trace_opto_pipelining()) {
|
||||
tty->print("# select %d: %s", n->_idx, n->Name());
|
||||
tty->print(", latency:%d", cfg->_node_latency->at_grow(n->_idx));
|
||||
tty->print(", latency:%d", cfg->get_latency_for_node(n));
|
||||
n->dump();
|
||||
if (Verbose) {
|
||||
tty->print("# ready list:");
|
||||
|
@ -30,9 +30,6 @@
|
||||
#include "opto/machnode.hpp"
|
||||
|
||||
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------PhaseLive--------------------------------------
|
||||
// Compute live-in/live-out. We use a totally incremental algorithm. The LIVE
|
||||
// problem is monotonic. The steady-state solution looks like this: pull a
|
||||
// block from the worklist. It has a set of delta's - values which are newly
|
||||
@ -53,9 +50,9 @@ void PhaseLive::compute(uint maxlrg) {
|
||||
|
||||
// Init the sparse live arrays. This data is live on exit from here!
|
||||
// The _live info is the live-out info.
|
||||
_live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*_cfg._num_blocks);
|
||||
_live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet) * _cfg.number_of_blocks());
|
||||
uint i;
|
||||
for( i=0; i<_cfg._num_blocks; i++ ) {
|
||||
for (i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
_live[i].initialize(_maxlrg);
|
||||
}
|
||||
|
||||
@ -65,14 +62,14 @@ void PhaseLive::compute(uint maxlrg) {
|
||||
// Does the memory used by _defs and _deltas get reclaimed? Does it matter? TT
|
||||
|
||||
// Array of values defined locally in blocks
|
||||
_defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg._num_blocks);
|
||||
for( i=0; i<_cfg._num_blocks; i++ ) {
|
||||
_defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg.number_of_blocks());
|
||||
for (i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
_defs[i].initialize(_maxlrg);
|
||||
}
|
||||
|
||||
// Array of delta-set pointers, indexed by block pre_order-1.
|
||||
_deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg._num_blocks);
|
||||
memset( _deltas, 0, sizeof(IndexSet*)* _cfg._num_blocks);
|
||||
_deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg.number_of_blocks());
|
||||
memset( _deltas, 0, sizeof(IndexSet*)* _cfg.number_of_blocks());
|
||||
|
||||
_free_IndexSet = NULL;
|
||||
|
||||
@ -80,31 +77,32 @@ void PhaseLive::compute(uint maxlrg) {
|
||||
VectorSet first_pass(Thread::current()->resource_area());
|
||||
|
||||
// Outer loop: must compute local live-in sets and push into predecessors.
|
||||
uint iters = _cfg._num_blocks; // stat counters
|
||||
for( uint j=_cfg._num_blocks; j>0; j-- ) {
|
||||
Block *b = _cfg._blocks[j-1];
|
||||
for (uint j = _cfg.number_of_blocks(); j > 0; j--) {
|
||||
Block* block = _cfg.get_block(j - 1);
|
||||
|
||||
// Compute the local live-in set. Start with any new live-out bits.
|
||||
IndexSet *use = getset( b );
|
||||
IndexSet *def = &_defs[b->_pre_order-1];
|
||||
IndexSet* use = getset(block);
|
||||
IndexSet* def = &_defs[block->_pre_order-1];
|
||||
DEBUG_ONLY(IndexSet *def_outside = getfreeset();)
|
||||
uint i;
|
||||
for( i=b->_nodes.size(); i>1; i-- ) {
|
||||
Node *n = b->_nodes[i-1];
|
||||
if( n->is_Phi() ) break;
|
||||
for (i = block->_nodes.size(); i > 1; i--) {
|
||||
Node* n = block->_nodes[i-1];
|
||||
if (n->is_Phi()) {
|
||||
break;
|
||||
}
|
||||
|
||||
uint r = _names[n->_idx];
|
||||
assert(!def_outside->member(r), "Use of external LRG overlaps the same LRG defined in this block");
|
||||
def->insert( r );
|
||||
use->remove( r );
|
||||
uint cnt = n->req();
|
||||
for( uint k=1; k<cnt; k++ ) {
|
||||
for (uint k = 1; k < cnt; k++) {
|
||||
Node *nk = n->in(k);
|
||||
uint nkidx = nk->_idx;
|
||||
if (_cfg.get_block_for_node(nk) != b) {
|
||||
if (_cfg.get_block_for_node(nk) != block) {
|
||||
uint u = _names[nkidx];
|
||||
use->insert( u );
|
||||
DEBUG_ONLY(def_outside->insert( u );)
|
||||
use->insert(u);
|
||||
DEBUG_ONLY(def_outside->insert(u);)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -113,41 +111,38 @@ void PhaseLive::compute(uint maxlrg) {
|
||||
_free_IndexSet = def_outside; // Drop onto free list
|
||||
#endif
|
||||
// Remove anything defined by Phis and the block start instruction
|
||||
for( uint k=i; k>0; k-- ) {
|
||||
uint r = _names[b->_nodes[k-1]->_idx];
|
||||
def->insert( r );
|
||||
use->remove( r );
|
||||
for (uint k = i; k > 0; k--) {
|
||||
uint r = _names[block->_nodes[k - 1]->_idx];
|
||||
def->insert(r);
|
||||
use->remove(r);
|
||||
}
|
||||
|
||||
// Push these live-in things to predecessors
|
||||
for( uint l=1; l<b->num_preds(); l++ ) {
|
||||
Block *p = _cfg.get_block_for_node(b->pred(l));
|
||||
add_liveout( p, use, first_pass );
|
||||
for (uint l = 1; l < block->num_preds(); l++) {
|
||||
Block* p = _cfg.get_block_for_node(block->pred(l));
|
||||
add_liveout(p, use, first_pass);
|
||||
|
||||
// PhiNode uses go in the live-out set of prior blocks.
|
||||
for( uint k=i; k>0; k-- )
|
||||
add_liveout( p, _names[b->_nodes[k-1]->in(l)->_idx], first_pass );
|
||||
for (uint k = i; k > 0; k--) {
|
||||
add_liveout(p, _names[block->_nodes[k-1]->in(l)->_idx], first_pass);
|
||||
}
|
||||
}
|
||||
freeset( b );
|
||||
first_pass.set(b->_pre_order);
|
||||
freeset(block);
|
||||
first_pass.set(block->_pre_order);
|
||||
|
||||
// Inner loop: blocks that picked up new live-out values to be propagated
|
||||
while( _worklist->size() ) {
|
||||
// !!!!!
|
||||
// #ifdef ASSERT
|
||||
iters++;
|
||||
// #endif
|
||||
Block *b = _worklist->pop();
|
||||
IndexSet *delta = getset(b);
|
||||
while (_worklist->size()) {
|
||||
Block* block = _worklist->pop();
|
||||
IndexSet *delta = getset(block);
|
||||
assert( delta->count(), "missing delta set" );
|
||||
|
||||
// Add new-live-in to predecessors live-out sets
|
||||
for (uint l = 1; l < b->num_preds(); l++) {
|
||||
Block* block = _cfg.get_block_for_node(b->pred(l));
|
||||
add_liveout(block, delta, first_pass);
|
||||
for (uint l = 1; l < block->num_preds(); l++) {
|
||||
Block* predecessor = _cfg.get_block_for_node(block->pred(l));
|
||||
add_liveout(predecessor, delta, first_pass);
|
||||
}
|
||||
|
||||
freeset(b);
|
||||
freeset(block);
|
||||
} // End of while-worklist-not-empty
|
||||
|
||||
} // End of for-all-blocks-outer-loop
|
||||
@ -155,7 +150,7 @@ void PhaseLive::compute(uint maxlrg) {
|
||||
// We explicitly clear all of the IndexSets which we are about to release.
|
||||
// This allows us to recycle their internal memory into IndexSet's free list.
|
||||
|
||||
for( i=0; i<_cfg._num_blocks; i++ ) {
|
||||
for (i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
_defs[i].clear();
|
||||
if (_deltas[i]) {
|
||||
// Is this always true?
|
||||
@ -171,13 +166,11 @@ void PhaseLive::compute(uint maxlrg) {
|
||||
|
||||
}
|
||||
|
||||
//------------------------------stats------------------------------------------
|
||||
#ifndef PRODUCT
|
||||
void PhaseLive::stats(uint iters) const {
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------getset-----------------------------------------
|
||||
// Get an IndexSet for a block. Return existing one, if any. Make a new
|
||||
// empty one if a prior one does not exist.
|
||||
IndexSet *PhaseLive::getset( Block *p ) {
|
||||
@ -188,7 +181,6 @@ IndexSet *PhaseLive::getset( Block *p ) {
|
||||
return delta; // Return set of new live-out items
|
||||
}
|
||||
|
||||
//------------------------------getfreeset-------------------------------------
|
||||
// Pull from free list, or allocate. Internal allocation on the returned set
|
||||
// is always from thread local storage.
|
||||
IndexSet *PhaseLive::getfreeset( ) {
|
||||
@ -207,7 +199,6 @@ IndexSet *PhaseLive::getfreeset( ) {
|
||||
return f;
|
||||
}
|
||||
|
||||
//------------------------------freeset----------------------------------------
|
||||
// Free an IndexSet from a block.
|
||||
void PhaseLive::freeset( const Block *p ) {
|
||||
IndexSet *f = _deltas[p->_pre_order-1];
|
||||
@ -216,7 +207,6 @@ void PhaseLive::freeset( const Block *p ) {
|
||||
_deltas[p->_pre_order-1] = NULL;
|
||||
}
|
||||
|
||||
//------------------------------add_liveout------------------------------------
|
||||
// Add a live-out value to a given blocks live-out set. If it is new, then
|
||||
// also add it to the delta set and stick the block on the worklist.
|
||||
void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) {
|
||||
@ -233,8 +223,6 @@ void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//------------------------------add_liveout------------------------------------
|
||||
// Add a vector of live-out values to a given blocks live-out set.
|
||||
void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) {
|
||||
IndexSet *live = &_live[p->_pre_order-1];
|
||||
@ -262,7 +250,6 @@ void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) {
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
//------------------------------dump-------------------------------------------
|
||||
// Dump the live-out set for a block
|
||||
void PhaseLive::dump( const Block *b ) const {
|
||||
tty->print("Block %d: ",b->_pre_order);
|
||||
@ -275,18 +262,19 @@ void PhaseLive::dump( const Block *b ) const {
|
||||
tty->print("\n");
|
||||
}
|
||||
|
||||
//------------------------------verify_base_ptrs-------------------------------
|
||||
// Verify that base pointers and derived pointers are still sane.
|
||||
void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
|
||||
#ifdef ASSERT
|
||||
Unique_Node_List worklist(a);
|
||||
for( uint i = 0; i < _cfg._num_blocks; i++ ) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
for( uint j = b->end_idx() + 1; j > 1; j-- ) {
|
||||
Node *n = b->_nodes[j-1];
|
||||
if( n->is_Phi() ) break;
|
||||
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
Block* block = _cfg.get_block(i);
|
||||
for (uint j = block->end_idx() + 1; j > 1; j--) {
|
||||
Node* n = block->_nodes[j-1];
|
||||
if (n->is_Phi()) {
|
||||
break;
|
||||
}
|
||||
// Found a safepoint?
|
||||
if( n->is_MachSafePoint() ) {
|
||||
if (n->is_MachSafePoint()) {
|
||||
MachSafePointNode *sfpt = n->as_MachSafePoint();
|
||||
JVMState* jvms = sfpt->jvms();
|
||||
if (jvms != NULL) {
|
||||
@ -358,7 +346,6 @@ void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
|
||||
#endif
|
||||
}
|
||||
|
||||
//------------------------------verify-------------------------------------
|
||||
// Verify that graphs and base pointers are still sane.
|
||||
void PhaseChaitin::verify( ResourceArea *a, bool verify_ifg ) const {
|
||||
#ifdef ASSERT
|
||||
|
@ -67,8 +67,8 @@ const uint Matcher::_begin_rematerialize = _BEGIN_REMATERIALIZE;
|
||||
const uint Matcher::_end_rematerialize = _END_REMATERIALIZE;
|
||||
|
||||
//---------------------------Matcher-------------------------------------------
|
||||
Matcher::Matcher( Node_List &proj_list ) :
|
||||
PhaseTransform( Phase::Ins_Select ),
|
||||
Matcher::Matcher()
|
||||
: PhaseTransform( Phase::Ins_Select ),
|
||||
#ifdef ASSERT
|
||||
_old2new_map(C->comp_arena()),
|
||||
_new2old_map(C->comp_arena()),
|
||||
@ -78,7 +78,7 @@ Matcher::Matcher( Node_List &proj_list ) :
|
||||
_swallowed(swallowed),
|
||||
_begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE),
|
||||
_end_inst_chain_rule(_END_INST_CHAIN_RULE),
|
||||
_must_clone(must_clone), _proj_list(proj_list),
|
||||
_must_clone(must_clone),
|
||||
_register_save_policy(register_save_policy),
|
||||
_c_reg_save_policy(c_reg_save_policy),
|
||||
_register_save_type(register_save_type),
|
||||
@ -1304,8 +1304,9 @@ MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) {
|
||||
for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
|
||||
proj->_rout.Insert(OptoReg::Name(i));
|
||||
}
|
||||
if( proj->_rout.is_NotEmpty() )
|
||||
_proj_list.push(proj);
|
||||
if (proj->_rout.is_NotEmpty()) {
|
||||
push_projection(proj);
|
||||
}
|
||||
}
|
||||
// Transfer the safepoint information from the call to the mcall
|
||||
// Move the JVMState list
|
||||
@ -1685,14 +1686,15 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
|
||||
}
|
||||
|
||||
// If the _leaf is an AddP, insert the base edge
|
||||
if( leaf->is_AddP() )
|
||||
if (leaf->is_AddP()) {
|
||||
mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
|
||||
}
|
||||
|
||||
uint num_proj = _proj_list.size();
|
||||
uint number_of_projections_prior = number_of_projections();
|
||||
|
||||
// Perform any 1-to-many expansions required
|
||||
MachNode *ex = mach->Expand(s,_proj_list, mem);
|
||||
if( ex != mach ) {
|
||||
MachNode *ex = mach->Expand(s, _projection_list, mem);
|
||||
if (ex != mach) {
|
||||
assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match");
|
||||
if( ex->in(1)->is_Con() )
|
||||
ex->in(1)->set_req(0, C->root());
|
||||
@ -1713,7 +1715,7 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
|
||||
// generated belatedly during spill code generation.
|
||||
if (_allocation_started) {
|
||||
guarantee(ex == mach, "no expand rules during spill generation");
|
||||
guarantee(_proj_list.size() == num_proj, "no allocation during spill generation");
|
||||
guarantee(number_of_projections_prior == number_of_projections(), "no allocation during spill generation");
|
||||
}
|
||||
|
||||
if (leaf->is_Con() || leaf->is_DecodeNarrowPtr()) {
|
||||
|
@ -88,7 +88,7 @@ class Matcher : public PhaseTransform {
|
||||
|
||||
Node *transform( Node *dummy );
|
||||
|
||||
Node_List &_proj_list; // For Machine nodes killing many values
|
||||
Node_List _projection_list; // For Machine nodes killing many values
|
||||
|
||||
Node_Array _shared_nodes;
|
||||
|
||||
@ -183,10 +183,30 @@ public:
|
||||
void collect_null_checks( Node *proj, Node *orig_proj );
|
||||
void validate_null_checks( );
|
||||
|
||||
Matcher( Node_List &proj_list );
|
||||
Matcher();
|
||||
|
||||
// Get a projection node at position pos
|
||||
Node* get_projection(uint pos) {
|
||||
return _projection_list[pos];
|
||||
}
|
||||
|
||||
// Push a projection node onto the projection list
|
||||
void push_projection(Node* node) {
|
||||
_projection_list.push(node);
|
||||
}
|
||||
|
||||
Node* pop_projection() {
|
||||
return _projection_list.pop();
|
||||
}
|
||||
|
||||
// Number of nodes in the projection list
|
||||
uint number_of_projections() const {
|
||||
return _projection_list.size();
|
||||
}
|
||||
|
||||
// Select instructions for entire method
|
||||
void match( );
|
||||
void match();
|
||||
|
||||
// Helper for match
|
||||
OptoReg::Name warp_incoming_stk_arg( VMReg reg );
|
||||
|
||||
|
@ -54,11 +54,10 @@ extern uint size_deopt_handler();
|
||||
extern int emit_exception_handler(CodeBuffer &cbuf);
|
||||
extern int emit_deopt_handler(CodeBuffer &cbuf);
|
||||
|
||||
//------------------------------Output-----------------------------------------
|
||||
// Convert Nodes to instruction bits and pass off to the VM
|
||||
void Compile::Output() {
|
||||
// RootNode goes
|
||||
assert( _cfg->_broot->_nodes.size() == 0, "" );
|
||||
assert( _cfg->get_root_block()->_nodes.size() == 0, "" );
|
||||
|
||||
// The number of new nodes (mostly MachNop) is proportional to
|
||||
// the number of java calls and inner loops which are aligned.
|
||||
@ -68,8 +67,8 @@ void Compile::Output() {
|
||||
return;
|
||||
}
|
||||
// Make sure I can find the Start Node
|
||||
Block *entry = _cfg->_blocks[1];
|
||||
Block *broot = _cfg->_broot;
|
||||
Block *entry = _cfg->get_block(1);
|
||||
Block *broot = _cfg->get_root_block();
|
||||
|
||||
const StartNode *start = entry->_nodes[0]->as_Start();
|
||||
|
||||
@ -109,40 +108,44 @@ void Compile::Output() {
|
||||
}
|
||||
|
||||
// Insert epilogs before every return
|
||||
for( uint i=0; i<_cfg->_num_blocks; i++ ) {
|
||||
Block *b = _cfg->_blocks[i];
|
||||
if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point?
|
||||
Node *m = b->end();
|
||||
if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) {
|
||||
MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
|
||||
b->add_inst( epilog );
|
||||
_cfg->map_node_to_block(epilog, b);
|
||||
for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
|
||||
Block* block = _cfg->get_block(i);
|
||||
if (!block->is_connector() && block->non_connector_successor(0) == _cfg->get_root_block()) { // Found a program exit point?
|
||||
Node* m = block->end();
|
||||
if (m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt) {
|
||||
MachEpilogNode* epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
|
||||
block->add_inst(epilog);
|
||||
_cfg->map_node_to_block(epilog, block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# ifdef ENABLE_ZAP_DEAD_LOCALS
|
||||
if ( ZapDeadCompiledLocals ) Insert_zap_nodes();
|
||||
if (ZapDeadCompiledLocals) {
|
||||
Insert_zap_nodes();
|
||||
}
|
||||
# endif
|
||||
|
||||
uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
|
||||
blk_starts[0] = 0;
|
||||
uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1);
|
||||
blk_starts[0] = 0;
|
||||
|
||||
// Initialize code buffer and process short branches.
|
||||
CodeBuffer* cb = init_buffer(blk_starts);
|
||||
|
||||
if (cb == NULL || failing()) return;
|
||||
if (cb == NULL || failing()) {
|
||||
return;
|
||||
}
|
||||
|
||||
ScheduleAndBundle();
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (trace_opto_output()) {
|
||||
tty->print("\n---- After ScheduleAndBundle ----\n");
|
||||
for (uint i = 0; i < _cfg->_num_blocks; i++) {
|
||||
for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
|
||||
tty->print("\nBB#%03d:\n", i);
|
||||
Block *bb = _cfg->_blocks[i];
|
||||
for (uint j = 0; j < bb->_nodes.size(); j++) {
|
||||
Node *n = bb->_nodes[j];
|
||||
Block* block = _cfg->get_block(i);
|
||||
for (uint j = 0; j < block->_nodes.size(); j++) {
|
||||
Node* n = block->_nodes[j];
|
||||
OptoReg::Name reg = _regalloc->get_reg_first(n);
|
||||
tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
|
||||
n->dump();
|
||||
@ -151,11 +154,15 @@ void Compile::Output() {
|
||||
}
|
||||
#endif
|
||||
|
||||
if (failing()) return;
|
||||
if (failing()) {
|
||||
return;
|
||||
}
|
||||
|
||||
BuildOopMaps();
|
||||
|
||||
if (failing()) return;
|
||||
if (failing()) {
|
||||
return;
|
||||
}
|
||||
|
||||
fill_buffer(cb, blk_starts);
|
||||
}
|
||||
@ -217,8 +224,8 @@ void Compile::Insert_zap_nodes() {
|
||||
return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care
|
||||
|
||||
// Insert call to zap runtime stub before every node with an oop map
|
||||
for( uint i=0; i<_cfg->_num_blocks; i++ ) {
|
||||
Block *b = _cfg->_blocks[i];
|
||||
for( uint i=0; i<_cfg->number_of_blocks(); i++ ) {
|
||||
Block *b = _cfg->get_block(i);
|
||||
for ( uint j = 0; j < b->_nodes.size(); ++j ) {
|
||||
Node *n = b->_nodes[j];
|
||||
|
||||
@ -275,7 +282,6 @@ Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) {
|
||||
return _matcher->match_sfpt(ideal_node);
|
||||
}
|
||||
|
||||
//------------------------------is_node_getting_a_safepoint--------------------
|
||||
bool Compile::is_node_getting_a_safepoint( Node* n) {
|
||||
// This code duplicates the logic prior to the call of add_safepoint
|
||||
// below in this file.
|
||||
@ -285,7 +291,6 @@ bool Compile::is_node_getting_a_safepoint( Node* n) {
|
||||
|
||||
# endif // ENABLE_ZAP_DEAD_LOCALS
|
||||
|
||||
//------------------------------compute_loop_first_inst_sizes------------------
|
||||
// Compute the size of first NumberOfLoopInstrToAlign instructions at the top
|
||||
// of a loop. When aligning a loop we need to provide enough instructions
|
||||
// in cpu's fetch buffer to feed decoders. The loop alignment could be
|
||||
@ -302,42 +307,39 @@ void Compile::compute_loop_first_inst_sizes() {
|
||||
// or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad
|
||||
// is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is
|
||||
// equal to 11 bytes which is the largest address NOP instruction.
|
||||
if( MaxLoopPad < OptoLoopAlignment-1 ) {
|
||||
uint last_block = _cfg->_num_blocks-1;
|
||||
for( uint i=1; i <= last_block; i++ ) {
|
||||
Block *b = _cfg->_blocks[i];
|
||||
if (MaxLoopPad < OptoLoopAlignment - 1) {
|
||||
uint last_block = _cfg->number_of_blocks() - 1;
|
||||
for (uint i = 1; i <= last_block; i++) {
|
||||
Block* block = _cfg->get_block(i);
|
||||
// Check the first loop's block which requires an alignment.
|
||||
if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) {
|
||||
if (block->loop_alignment() > (uint)relocInfo::addr_unit()) {
|
||||
uint sum_size = 0;
|
||||
uint inst_cnt = NumberOfLoopInstrToAlign;
|
||||
inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
|
||||
inst_cnt = block->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
|
||||
|
||||
// Check subsequent fallthrough blocks if the loop's first
|
||||
// block(s) does not have enough instructions.
|
||||
Block *nb = b;
|
||||
while( inst_cnt > 0 &&
|
||||
i < last_block &&
|
||||
!_cfg->_blocks[i+1]->has_loop_alignment() &&
|
||||
!nb->has_successor(b) ) {
|
||||
Block *nb = block;
|
||||
while(inst_cnt > 0 &&
|
||||
i < last_block &&
|
||||
!_cfg->get_block(i + 1)->has_loop_alignment() &&
|
||||
!nb->has_successor(block)) {
|
||||
i++;
|
||||
nb = _cfg->_blocks[i];
|
||||
nb = _cfg->get_block(i);
|
||||
inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
|
||||
} // while( inst_cnt > 0 && i < last_block )
|
||||
|
||||
b->set_first_inst_size(sum_size);
|
||||
block->set_first_inst_size(sum_size);
|
||||
} // f( b->head()->is_Loop() )
|
||||
} // for( i <= last_block )
|
||||
} // if( MaxLoopPad < OptoLoopAlignment-1 )
|
||||
}
|
||||
|
||||
//----------------------shorten_branches---------------------------------------
|
||||
// The architecture description provides short branch variants for some long
|
||||
// branch instructions. Replace eligible long branches with short branches.
|
||||
void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
|
||||
|
||||
// ------------------
|
||||
// Compute size of each block, method size, and relocation information size
|
||||
uint nblocks = _cfg->_num_blocks;
|
||||
uint nblocks = _cfg->number_of_blocks();
|
||||
|
||||
uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
|
||||
uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks);
|
||||
@ -364,7 +366,7 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
|
||||
uint last_avoid_back_to_back_adr = max_uint;
|
||||
uint nop_size = (new (this) MachNopNode())->size(_regalloc);
|
||||
for (uint i = 0; i < nblocks; i++) { // For all blocks
|
||||
Block *b = _cfg->_blocks[i];
|
||||
Block* block = _cfg->get_block(i);
|
||||
|
||||
// During short branch replacement, we store the relative (to blk_starts)
|
||||
// offset of jump in jmp_offset, rather than the absolute offset of jump.
|
||||
@ -377,10 +379,10 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
|
||||
DEBUG_ONLY( jmp_rule[i] = 0; )
|
||||
|
||||
// Sum all instruction sizes to compute block size
|
||||
uint last_inst = b->_nodes.size();
|
||||
uint last_inst = block->_nodes.size();
|
||||
uint blk_size = 0;
|
||||
for (uint j = 0; j < last_inst; j++) {
|
||||
Node* nj = b->_nodes[j];
|
||||
Node* nj = block->_nodes[j];
|
||||
// Handle machine instruction nodes
|
||||
if (nj->is_Mach()) {
|
||||
MachNode *mach = nj->as_Mach();
|
||||
@ -441,8 +443,8 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
|
||||
// When the next block starts a loop, we may insert pad NOP
|
||||
// instructions. Since we cannot know our future alignment,
|
||||
// assume the worst.
|
||||
if (i< nblocks-1) {
|
||||
Block *nb = _cfg->_blocks[i+1];
|
||||
if (i < nblocks - 1) {
|
||||
Block* nb = _cfg->get_block(i + 1);
|
||||
int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
|
||||
if (max_loop_pad > 0) {
|
||||
assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), "");
|
||||
@ -473,26 +475,26 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
|
||||
has_short_branch_candidate = false;
|
||||
int adjust_block_start = 0;
|
||||
for (uint i = 0; i < nblocks; i++) {
|
||||
Block *b = _cfg->_blocks[i];
|
||||
Block* block = _cfg->get_block(i);
|
||||
int idx = jmp_nidx[i];
|
||||
MachNode* mach = (idx == -1) ? NULL: b->_nodes[idx]->as_Mach();
|
||||
MachNode* mach = (idx == -1) ? NULL: block->_nodes[idx]->as_Mach();
|
||||
if (mach != NULL && mach->may_be_short_branch()) {
|
||||
#ifdef ASSERT
|
||||
assert(jmp_size[i] > 0 && mach->is_MachBranch(), "sanity");
|
||||
int j;
|
||||
// Find the branch; ignore trailing NOPs.
|
||||
for (j = b->_nodes.size()-1; j>=0; j--) {
|
||||
Node* n = b->_nodes[j];
|
||||
for (j = block->_nodes.size()-1; j>=0; j--) {
|
||||
Node* n = block->_nodes[j];
|
||||
if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con)
|
||||
break;
|
||||
}
|
||||
assert(j >= 0 && j == idx && b->_nodes[j] == (Node*)mach, "sanity");
|
||||
assert(j >= 0 && j == idx && block->_nodes[j] == (Node*)mach, "sanity");
|
||||
#endif
|
||||
int br_size = jmp_size[i];
|
||||
int br_offs = blk_starts[i] + jmp_offset[i];
|
||||
|
||||
// This requires the TRUE branch target be in succs[0]
|
||||
uint bnum = b->non_connector_successor(0)->_pre_order;
|
||||
uint bnum = block->non_connector_successor(0)->_pre_order;
|
||||
int offset = blk_starts[bnum] - br_offs;
|
||||
if (bnum > i) { // adjust following block's offset
|
||||
offset -= adjust_block_start;
|
||||
@ -520,7 +522,7 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
|
||||
diff -= nop_size;
|
||||
}
|
||||
adjust_block_start += diff;
|
||||
b->_nodes.map(idx, replacement);
|
||||
block->_nodes.map(idx, replacement);
|
||||
mach->subsume_by(replacement, C);
|
||||
mach = replacement;
|
||||
progress = true;
|
||||
@ -1083,8 +1085,8 @@ CodeBuffer* Compile::init_buffer(uint* blk_starts) {
|
||||
if (has_mach_constant_base_node()) {
|
||||
// Fill the constant table.
|
||||
// Note: This must happen before shorten_branches.
|
||||
for (uint i = 0; i < _cfg->_num_blocks; i++) {
|
||||
Block* b = _cfg->_blocks[i];
|
||||
for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
|
||||
Block* b = _cfg->get_block(i);
|
||||
|
||||
for (uint j = 0; j < b->_nodes.size(); j++) {
|
||||
Node* n = b->_nodes[j];
|
||||
@ -1170,7 +1172,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
// !!!!! This preserves old handling of oopmaps for now
|
||||
debug_info()->set_oopmaps(_oop_map_set);
|
||||
|
||||
uint nblocks = _cfg->_num_blocks;
|
||||
uint nblocks = _cfg->number_of_blocks();
|
||||
// Count and start of implicit null check instructions
|
||||
uint inct_cnt = 0;
|
||||
uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+1);
|
||||
@ -1218,21 +1220,21 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
// Now fill in the code buffer
|
||||
Node *delay_slot = NULL;
|
||||
|
||||
for (uint i=0; i < nblocks; i++) {
|
||||
Block *b = _cfg->_blocks[i];
|
||||
|
||||
Node *head = b->head();
|
||||
for (uint i = 0; i < nblocks; i++) {
|
||||
Block* block = _cfg->get_block(i);
|
||||
Node* head = block->head();
|
||||
|
||||
// If this block needs to start aligned (i.e, can be reached other
|
||||
// than by falling-thru from the previous block), then force the
|
||||
// start of a new bundle.
|
||||
if (Pipeline::requires_bundling() && starts_bundle(head))
|
||||
if (Pipeline::requires_bundling() && starts_bundle(head)) {
|
||||
cb->flush_bundle(true);
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
if (!b->is_connector()) {
|
||||
if (!block->is_connector()) {
|
||||
stringStream st;
|
||||
b->dump_head(_cfg, &st);
|
||||
block->dump_head(_cfg, &st);
|
||||
MacroAssembler(cb).block_comment(st.as_string());
|
||||
}
|
||||
jmp_target[i] = 0;
|
||||
@ -1243,16 +1245,16 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
int blk_offset = current_offset;
|
||||
|
||||
// Define the label at the beginning of the basic block
|
||||
MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
|
||||
MacroAssembler(cb).bind(blk_labels[block->_pre_order]);
|
||||
|
||||
uint last_inst = b->_nodes.size();
|
||||
uint last_inst = block->_nodes.size();
|
||||
|
||||
// Emit block normally, except for last instruction.
|
||||
// Emit means "dump code bits into code buffer".
|
||||
for (uint j = 0; j<last_inst; j++) {
|
||||
|
||||
// Get the node
|
||||
Node* n = b->_nodes[j];
|
||||
Node* n = block->_nodes[j];
|
||||
|
||||
// See if delay slots are supported
|
||||
if (valid_bundle_info(n) &&
|
||||
@ -1306,9 +1308,9 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
|
||||
int nops_cnt = padding / nop_size;
|
||||
MachNode *nop = new (this) MachNopNode(nops_cnt);
|
||||
b->_nodes.insert(j++, nop);
|
||||
block->_nodes.insert(j++, nop);
|
||||
last_inst++;
|
||||
_cfg->map_node_to_block(nop, b);
|
||||
_cfg->map_node_to_block(nop, block);
|
||||
nop->emit(*cb, _regalloc);
|
||||
cb->flush_bundle(true);
|
||||
current_offset = cb->insts_size();
|
||||
@ -1322,7 +1324,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
mcall->method_set((intptr_t)mcall->entry_point());
|
||||
|
||||
// Save the return address
|
||||
call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset();
|
||||
call_returns[block->_pre_order] = current_offset + mcall->ret_addr_offset();
|
||||
|
||||
if (mcall->is_MachCallLeaf()) {
|
||||
is_mcall = false;
|
||||
@ -1359,7 +1361,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
// If this is a branch, then fill in the label with the target BB's label
|
||||
else if (mach->is_MachBranch()) {
|
||||
// This requires the TRUE branch target be in succs[0]
|
||||
uint block_num = b->non_connector_successor(0)->_pre_order;
|
||||
uint block_num = block->non_connector_successor(0)->_pre_order;
|
||||
|
||||
// Try to replace long branch if delay slot is not used,
|
||||
// it is mostly for back branches since forward branch's
|
||||
@ -1392,8 +1394,8 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
// Insert padding between avoid_back_to_back branches.
|
||||
if (needs_padding && replacement->avoid_back_to_back()) {
|
||||
MachNode *nop = new (this) MachNopNode();
|
||||
b->_nodes.insert(j++, nop);
|
||||
_cfg->map_node_to_block(nop, b);
|
||||
block->_nodes.insert(j++, nop);
|
||||
_cfg->map_node_to_block(nop, block);
|
||||
last_inst++;
|
||||
nop->emit(*cb, _regalloc);
|
||||
cb->flush_bundle(true);
|
||||
@ -1405,7 +1407,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
jmp_size[i] = new_size;
|
||||
jmp_rule[i] = mach->rule();
|
||||
#endif
|
||||
b->_nodes.map(j, replacement);
|
||||
block->_nodes.map(j, replacement);
|
||||
mach->subsume_by(replacement, C);
|
||||
n = replacement;
|
||||
mach = replacement;
|
||||
@ -1413,8 +1415,8 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
}
|
||||
mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num );
|
||||
} else if (mach->ideal_Opcode() == Op_Jump) {
|
||||
for (uint h = 0; h < b->_num_succs; h++) {
|
||||
Block* succs_block = b->_succs[h];
|
||||
for (uint h = 0; h < block->_num_succs; h++) {
|
||||
Block* succs_block = block->_succs[h];
|
||||
for (uint j = 1; j < succs_block->num_preds(); j++) {
|
||||
Node* jpn = succs_block->pred(j);
|
||||
if (jpn->is_JumpProj() && jpn->in(0) == mach) {
|
||||
@ -1425,7 +1427,6 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
// Check that oop-store precedes the card-mark
|
||||
else if (mach->ideal_Opcode() == Op_StoreCM) {
|
||||
@ -1436,17 +1437,18 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
if (oop_store == NULL) continue;
|
||||
count++;
|
||||
uint i4;
|
||||
for( i4 = 0; i4 < last_inst; ++i4 ) {
|
||||
if( b->_nodes[i4] == oop_store ) break;
|
||||
for (i4 = 0; i4 < last_inst; ++i4) {
|
||||
if (block->_nodes[i4] == oop_store) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Note: This test can provide a false failure if other precedence
|
||||
// edges have been added to the storeCMNode.
|
||||
assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
|
||||
assert(i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
|
||||
}
|
||||
assert(count > 0, "storeCM expects at least one precedence edge");
|
||||
}
|
||||
#endif
|
||||
|
||||
else if (!n->is_Proj()) {
|
||||
// Remember the beginning of the previous instruction, in case
|
||||
// it's followed by a flag-kill and a null-check. Happens on
|
||||
@ -1542,12 +1544,12 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
// If the next block is the top of a loop, pad this block out to align
|
||||
// the loop top a little. Helps prevent pipe stalls at loop back branches.
|
||||
if (i < nblocks-1) {
|
||||
Block *nb = _cfg->_blocks[i+1];
|
||||
Block *nb = _cfg->get_block(i + 1);
|
||||
int padding = nb->alignment_padding(current_offset);
|
||||
if( padding > 0 ) {
|
||||
MachNode *nop = new (this) MachNopNode(padding / nop_size);
|
||||
b->_nodes.insert( b->_nodes.size(), nop );
|
||||
_cfg->map_node_to_block(nop, b);
|
||||
block->_nodes.insert(block->_nodes.size(), nop);
|
||||
_cfg->map_node_to_block(nop, block);
|
||||
nop->emit(*cb, _regalloc);
|
||||
current_offset = cb->insts_size();
|
||||
}
|
||||
@ -1587,8 +1589,6 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// ------------------
|
||||
|
||||
#ifndef PRODUCT
|
||||
// Information on the size of the method, without the extraneous code
|
||||
Scheduling::increment_method_size(cb->insts_size());
|
||||
@ -1649,52 +1649,55 @@ void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_start
|
||||
_inc_table.set_size(cnt);
|
||||
|
||||
uint inct_cnt = 0;
|
||||
for( uint i=0; i<_cfg->_num_blocks; i++ ) {
|
||||
Block *b = _cfg->_blocks[i];
|
||||
for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
|
||||
Block* block = _cfg->get_block(i);
|
||||
Node *n = NULL;
|
||||
int j;
|
||||
|
||||
// Find the branch; ignore trailing NOPs.
|
||||
for( j = b->_nodes.size()-1; j>=0; j-- ) {
|
||||
n = b->_nodes[j];
|
||||
if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con )
|
||||
for (j = block->_nodes.size() - 1; j >= 0; j--) {
|
||||
n = block->_nodes[j];
|
||||
if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we didn't find anything, continue
|
||||
if( j < 0 ) continue;
|
||||
if (j < 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compute ExceptionHandlerTable subtable entry and add it
|
||||
// (skip empty blocks)
|
||||
if( n->is_Catch() ) {
|
||||
if (n->is_Catch()) {
|
||||
|
||||
// Get the offset of the return from the call
|
||||
uint call_return = call_returns[b->_pre_order];
|
||||
uint call_return = call_returns[block->_pre_order];
|
||||
#ifdef ASSERT
|
||||
assert( call_return > 0, "no call seen for this basic block" );
|
||||
while( b->_nodes[--j]->is_MachProj() ) ;
|
||||
assert( b->_nodes[j]->is_MachCall(), "CatchProj must follow call" );
|
||||
while (block->_nodes[--j]->is_MachProj()) ;
|
||||
assert(block->_nodes[j]->is_MachCall(), "CatchProj must follow call");
|
||||
#endif
|
||||
// last instruction is a CatchNode, find it's CatchProjNodes
|
||||
int nof_succs = b->_num_succs;
|
||||
int nof_succs = block->_num_succs;
|
||||
// allocate space
|
||||
GrowableArray<intptr_t> handler_bcis(nof_succs);
|
||||
GrowableArray<intptr_t> handler_pcos(nof_succs);
|
||||
// iterate through all successors
|
||||
for (int j = 0; j < nof_succs; j++) {
|
||||
Block* s = b->_succs[j];
|
||||
Block* s = block->_succs[j];
|
||||
bool found_p = false;
|
||||
for( uint k = 1; k < s->num_preds(); k++ ) {
|
||||
Node *pk = s->pred(k);
|
||||
if( pk->is_CatchProj() && pk->in(0) == n ) {
|
||||
for (uint k = 1; k < s->num_preds(); k++) {
|
||||
Node* pk = s->pred(k);
|
||||
if (pk->is_CatchProj() && pk->in(0) == n) {
|
||||
const CatchProjNode* p = pk->as_CatchProj();
|
||||
found_p = true;
|
||||
// add the corresponding handler bci & pco information
|
||||
if( p->_con != CatchProjNode::fall_through_index ) {
|
||||
if (p->_con != CatchProjNode::fall_through_index) {
|
||||
// p leads to an exception handler (and is not fall through)
|
||||
assert(s == _cfg->_blocks[s->_pre_order],"bad numbering");
|
||||
assert(s == _cfg->get_block(s->_pre_order), "bad numbering");
|
||||
// no duplicates, please
|
||||
if( !handler_bcis.contains(p->handler_bci()) ) {
|
||||
if (!handler_bcis.contains(p->handler_bci())) {
|
||||
uint block_num = s->non_connector()->_pre_order;
|
||||
handler_bcis.append(p->handler_bci());
|
||||
handler_pcos.append(blk_labels[block_num].loc_pos());
|
||||
@ -1713,9 +1716,9 @@ void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_start
|
||||
}
|
||||
|
||||
// Handle implicit null exception table updates
|
||||
if( n->is_MachNullCheck() ) {
|
||||
uint block_num = b->non_connector_successor(0)->_pre_order;
|
||||
_inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() );
|
||||
if (n->is_MachNullCheck()) {
|
||||
uint block_num = block->non_connector_successor(0)->_pre_order;
|
||||
_inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos());
|
||||
continue;
|
||||
}
|
||||
} // End of for all blocks fill in exception table entries
|
||||
@ -1774,14 +1777,12 @@ Scheduling::Scheduling(Arena *arena, Compile &compile)
|
||||
memset(_current_latency, 0, node_max * sizeof(unsigned short));
|
||||
|
||||
// Clear the bundling information
|
||||
memcpy(_bundle_use_elements,
|
||||
Pipeline_Use::elaborated_elements,
|
||||
sizeof(Pipeline_Use::elaborated_elements));
|
||||
memcpy(_bundle_use_elements, Pipeline_Use::elaborated_elements, sizeof(Pipeline_Use::elaborated_elements));
|
||||
|
||||
// Get the last node
|
||||
Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1];
|
||||
Block* block = _cfg->get_block(_cfg->number_of_blocks() - 1);
|
||||
|
||||
_next_node = bb->_nodes[bb->_nodes.size()-1];
|
||||
_next_node = block->_nodes[block->_nodes.size() - 1];
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
@ -1831,7 +1832,6 @@ void Scheduling::step_and_clear() {
|
||||
sizeof(Pipeline_Use::elaborated_elements));
|
||||
}
|
||||
|
||||
//------------------------------ScheduleAndBundle------------------------------
|
||||
// Perform instruction scheduling and bundling over the sequence of
|
||||
// instructions in backwards order.
|
||||
void Compile::ScheduleAndBundle() {
|
||||
@ -1858,7 +1858,6 @@ void Compile::ScheduleAndBundle() {
|
||||
scheduling.DoScheduling();
|
||||
}
|
||||
|
||||
//------------------------------ComputeLocalLatenciesForward-------------------
|
||||
// Compute the latency of all the instructions. This is fairly simple,
|
||||
// because we already have a legal ordering. Walk over the instructions
|
||||
// from first to last, and compute the latency of the instruction based
|
||||
@ -2028,7 +2027,6 @@ Node * Scheduling::ChooseNodeToBundle() {
|
||||
return _available[0];
|
||||
}
|
||||
|
||||
//------------------------------AddNodeToAvailableList-------------------------
|
||||
void Scheduling::AddNodeToAvailableList(Node *n) {
|
||||
assert( !n->is_Proj(), "projections never directly made available" );
|
||||
#ifndef PRODUCT
|
||||
@ -2074,7 +2072,6 @@ void Scheduling::AddNodeToAvailableList(Node *n) {
|
||||
#endif
|
||||
}
|
||||
|
||||
//------------------------------DecrementUseCounts-----------------------------
|
||||
void Scheduling::DecrementUseCounts(Node *n, const Block *bb) {
|
||||
for ( uint i=0; i < n->len(); i++ ) {
|
||||
Node *def = n->in(i);
|
||||
@ -2097,7 +2094,6 @@ void Scheduling::DecrementUseCounts(Node *n, const Block *bb) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------AddNodeToBundle--------------------------------
|
||||
void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
|
||||
#ifndef PRODUCT
|
||||
if (_cfg->C->trace_opto_output()) {
|
||||
@ -2312,7 +2308,6 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
|
||||
DecrementUseCounts(n,bb);
|
||||
}
|
||||
|
||||
//------------------------------ComputeUseCount--------------------------------
|
||||
// This method sets the use count within a basic block. We will ignore all
|
||||
// uses outside the current basic block. As we are doing a backwards walk,
|
||||
// any node we reach that has a use count of 0 may be scheduled. This also
|
||||
@ -2397,20 +2392,22 @@ void Scheduling::DoScheduling() {
|
||||
Block *bb;
|
||||
|
||||
// Walk over all the basic blocks in reverse order
|
||||
for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) {
|
||||
bb = _cfg->_blocks[i];
|
||||
for (int i = _cfg->number_of_blocks() - 1; i >= 0; succ_bb = bb, i--) {
|
||||
bb = _cfg->get_block(i);
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (_cfg->C->trace_opto_output()) {
|
||||
tty->print("# Schedule BB#%03d (initial)\n", i);
|
||||
for (uint j = 0; j < bb->_nodes.size(); j++)
|
||||
for (uint j = 0; j < bb->_nodes.size(); j++) {
|
||||
bb->_nodes[j]->dump();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// On the head node, skip processing
|
||||
if( bb == _cfg->_broot )
|
||||
if (bb == _cfg->get_root_block()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip empty, connector blocks
|
||||
if (bb->is_connector())
|
||||
@ -2547,7 +2544,6 @@ void Scheduling::DoScheduling() {
|
||||
|
||||
} // end DoScheduling
|
||||
|
||||
//------------------------------verify_good_schedule---------------------------
|
||||
// Verify that no live-range used in the block is killed in the block by a
|
||||
// wrong DEF. This doesn't verify live-ranges that span blocks.
|
||||
|
||||
@ -2560,7 +2556,6 @@ static bool edge_from_to( Node *from, Node *to ) {
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
//------------------------------verify_do_def----------------------------------
|
||||
void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) {
|
||||
// Check for bad kills
|
||||
if( OptoReg::is_valid(def) ) { // Ignore stores & control flow
|
||||
@ -2576,7 +2571,6 @@ void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------verify_good_schedule---------------------------
|
||||
void Scheduling::verify_good_schedule( Block *b, const char *msg ) {
|
||||
|
||||
// Zap to something reasonable for the verify code
|
||||
@ -2636,7 +2630,6 @@ static void add_prec_edge_from_to( Node *from, Node *to ) {
|
||||
from->add_prec(to);
|
||||
}
|
||||
|
||||
//------------------------------anti_do_def------------------------------------
|
||||
void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) {
|
||||
if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow
|
||||
return;
|
||||
@ -2706,7 +2699,6 @@ void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is
|
||||
add_prec_edge_from_to(kill,pinch);
|
||||
}
|
||||
|
||||
//------------------------------anti_do_use------------------------------------
|
||||
void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) {
|
||||
if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow
|
||||
return;
|
||||
@ -2727,7 +2719,6 @@ void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------ComputeRegisterAntidependences-----------------
|
||||
// We insert antidependences between the reads and following write of
|
||||
// allocated registers to prevent illegal code motion. Hopefully, the
|
||||
// number of added references should be fairly small, especially as we
|
||||
@ -2861,8 +2852,6 @@ void Scheduling::ComputeRegisterAntidependencies(Block *b) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------garbage_collect_pinch_nodes-------------------------------
|
||||
|
||||
// Garbage collect pinch nodes for reuse by other blocks.
|
||||
//
|
||||
// The block scheduler's insertion of anti-dependence
|
||||
@ -2937,7 +2926,6 @@ void Scheduling::cleanup_pinch( Node *pinch ) {
|
||||
pinch->set_req(0, NULL);
|
||||
}
|
||||
|
||||
//------------------------------print_statistics-------------------------------
|
||||
#ifndef PRODUCT
|
||||
|
||||
void Scheduling::dump_available() const {
|
||||
|
@ -1643,8 +1643,8 @@ void PhasePeephole::do_transform() {
|
||||
bool method_name_not_printed = true;
|
||||
|
||||
// Examine each basic block
|
||||
for( uint block_number = 1; block_number < _cfg._num_blocks; ++block_number ) {
|
||||
Block *block = _cfg._blocks[block_number];
|
||||
for (uint block_number = 1; block_number < _cfg.number_of_blocks(); ++block_number) {
|
||||
Block* block = _cfg.get_block(block_number);
|
||||
bool block_not_printed = true;
|
||||
|
||||
// and each instruction within a block
|
||||
|
@ -405,28 +405,29 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
|
||||
// Need a mapping from basic block Node_Lists. We need a Node_List to
|
||||
// map from register number to value-producing Node.
|
||||
Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1);
|
||||
memset( blk2value, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) );
|
||||
Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1);
|
||||
memset(blk2value, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1));
|
||||
// Need a mapping from basic block Node_Lists. We need a Node_List to
|
||||
// map from register number to register-defining Node.
|
||||
Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1);
|
||||
memset( blk2regnd, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) );
|
||||
Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1);
|
||||
memset(blk2regnd, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1));
|
||||
|
||||
// We keep unused Node_Lists on a free_list to avoid wasting
|
||||
// memory.
|
||||
GrowableArray<Node_List*> free_list = GrowableArray<Node_List*>(16);
|
||||
|
||||
// For all blocks
|
||||
for( uint i = 0; i < _cfg._num_blocks; i++ ) {
|
||||
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
|
||||
uint j;
|
||||
Block *b = _cfg._blocks[i];
|
||||
Block* block = _cfg.get_block(i);
|
||||
|
||||
// Count of Phis in block
|
||||
uint phi_dex;
|
||||
for( phi_dex = 1; phi_dex < b->_nodes.size(); phi_dex++ ) {
|
||||
Node *phi = b->_nodes[phi_dex];
|
||||
if( !phi->is_Phi() )
|
||||
for (phi_dex = 1; phi_dex < block->_nodes.size(); phi_dex++) {
|
||||
Node* phi = block->_nodes[phi_dex];
|
||||
if (!phi->is_Phi()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If any predecessor has not been visited, we do not know the state
|
||||
@ -434,21 +435,23 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
// along Phi input edges
|
||||
bool missing_some_inputs = false;
|
||||
Block *freed = NULL;
|
||||
for( j = 1; j < b->num_preds(); j++ ) {
|
||||
Block *pb = _cfg.get_block_for_node(b->pred(j));
|
||||
for (j = 1; j < block->num_preds(); j++) {
|
||||
Block* pb = _cfg.get_block_for_node(block->pred(j));
|
||||
// Remove copies along phi edges
|
||||
for( uint k=1; k<phi_dex; k++ )
|
||||
elide_copy( b->_nodes[k], j, b, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false );
|
||||
if( blk2value[pb->_pre_order] ) { // Have a mapping on this edge?
|
||||
for (uint k = 1; k < phi_dex; k++) {
|
||||
elide_copy(block->_nodes[k], j, block, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false);
|
||||
}
|
||||
if (blk2value[pb->_pre_order]) { // Have a mapping on this edge?
|
||||
// See if this predecessor's mappings have been used by everybody
|
||||
// who wants them. If so, free 'em.
|
||||
uint k;
|
||||
for( k=0; k<pb->_num_succs; k++ ) {
|
||||
Block *pbsucc = pb->_succs[k];
|
||||
if( !blk2value[pbsucc->_pre_order] && pbsucc != b )
|
||||
for (k = 0; k < pb->_num_succs; k++) {
|
||||
Block* pbsucc = pb->_succs[k];
|
||||
if (!blk2value[pbsucc->_pre_order] && pbsucc != block) {
|
||||
break; // Found a future user
|
||||
}
|
||||
}
|
||||
if( k >= pb->_num_succs ) { // No more uses, free!
|
||||
if (k >= pb->_num_succs) { // No more uses, free!
|
||||
freed = pb; // Record last block freed
|
||||
free_list.push(blk2value[pb->_pre_order]);
|
||||
free_list.push(blk2regnd[pb->_pre_order]);
|
||||
@ -467,20 +470,20 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
value.map(_max_reg,NULL);
|
||||
regnd.map(_max_reg,NULL);
|
||||
// Set mappings as OUR mappings
|
||||
blk2value[b->_pre_order] = &value;
|
||||
blk2regnd[b->_pre_order] = ®nd;
|
||||
blk2value[block->_pre_order] = &value;
|
||||
blk2regnd[block->_pre_order] = ®nd;
|
||||
|
||||
// Initialize value & regnd for this block
|
||||
if( missing_some_inputs ) {
|
||||
if (missing_some_inputs) {
|
||||
// Some predecessor has not yet been visited; zap map to empty
|
||||
for( uint k = 0; k < (uint)_max_reg; k++ ) {
|
||||
for (uint k = 0; k < (uint)_max_reg; k++) {
|
||||
value.map(k,NULL);
|
||||
regnd.map(k,NULL);
|
||||
}
|
||||
} else {
|
||||
if( !freed ) { // Didn't get a freebie prior block
|
||||
// Must clone some data
|
||||
freed = _cfg.get_block_for_node(b->pred(1));
|
||||
freed = _cfg.get_block_for_node(block->pred(1));
|
||||
Node_List &f_value = *blk2value[freed->_pre_order];
|
||||
Node_List &f_regnd = *blk2regnd[freed->_pre_order];
|
||||
for( uint k = 0; k < (uint)_max_reg; k++ ) {
|
||||
@ -489,9 +492,11 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
}
|
||||
}
|
||||
// Merge all inputs together, setting to NULL any conflicts.
|
||||
for( j = 1; j < b->num_preds(); j++ ) {
|
||||
Block *pb = _cfg.get_block_for_node(b->pred(j));
|
||||
if( pb == freed ) continue; // Did self already via freelist
|
||||
for (j = 1; j < block->num_preds(); j++) {
|
||||
Block* pb = _cfg.get_block_for_node(block->pred(j));
|
||||
if (pb == freed) {
|
||||
continue; // Did self already via freelist
|
||||
}
|
||||
Node_List &p_regnd = *blk2regnd[pb->_pre_order];
|
||||
for( uint k = 0; k < (uint)_max_reg; k++ ) {
|
||||
if( regnd[k] != p_regnd[k] ) { // Conflict on reaching defs?
|
||||
@ -503,9 +508,9 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
}
|
||||
|
||||
// For all Phi's
|
||||
for( j = 1; j < phi_dex; j++ ) {
|
||||
for (j = 1; j < phi_dex; j++) {
|
||||
uint k;
|
||||
Node *phi = b->_nodes[j];
|
||||
Node *phi = block->_nodes[j];
|
||||
uint pidx = _lrg_map.live_range_id(phi);
|
||||
OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
|
||||
|
||||
@ -516,8 +521,8 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
if( phi != x && u != x ) // Found a different input
|
||||
u = u ? NodeSentinel : x; // Capture unique input, or NodeSentinel for 2nd input
|
||||
}
|
||||
if( u != NodeSentinel ) { // Junk Phi. Remove
|
||||
b->_nodes.remove(j--);
|
||||
if (u != NodeSentinel) { // Junk Phi. Remove
|
||||
block->_nodes.remove(j--);
|
||||
phi_dex--;
|
||||
_cfg.unmap_node_from_block(phi);
|
||||
phi->replace_by(u);
|
||||
@ -547,13 +552,13 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
}
|
||||
|
||||
// For all remaining instructions
|
||||
for( j = phi_dex; j < b->_nodes.size(); j++ ) {
|
||||
Node *n = b->_nodes[j];
|
||||
for (j = phi_dex; j < block->_nodes.size(); j++) {
|
||||
Node* n = block->_nodes[j];
|
||||
|
||||
if( n->outcnt() == 0 && // Dead?
|
||||
n != C->top() && // (ignore TOP, it has no du info)
|
||||
!n->is_Proj() ) { // fat-proj kills
|
||||
j -= yank_if_dead(n,b,&value,®nd);
|
||||
if(n->outcnt() == 0 && // Dead?
|
||||
n != C->top() && // (ignore TOP, it has no du info)
|
||||
!n->is_Proj() ) { // fat-proj kills
|
||||
j -= yank_if_dead(n, block, &value, ®nd);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -598,8 +603,9 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
const uint two_adr = n->is_Mach() ? n->as_Mach()->two_adr() : 0;
|
||||
|
||||
// Remove copies along input edges
|
||||
for( k = 1; k < n->req(); k++ )
|
||||
j -= elide_copy( n, k, b, value, regnd, two_adr!=k );
|
||||
for (k = 1; k < n->req(); k++) {
|
||||
j -= elide_copy(n, k, block, value, regnd, two_adr != k);
|
||||
}
|
||||
|
||||
// Unallocated Nodes define no registers
|
||||
uint lidx = _lrg_map.live_range_id(n);
|
||||
@ -630,8 +636,8 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
// then 'n' is a useless copy. Do not update the register->node
|
||||
// mapping so 'n' will go dead.
|
||||
if( value[nreg] != val ) {
|
||||
if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, OptoReg::Bad)) {
|
||||
j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
|
||||
if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, OptoReg::Bad)) {
|
||||
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
|
||||
} else {
|
||||
// Update the mapping: record new Node defined by the register
|
||||
regnd.map(nreg,n);
|
||||
@ -640,8 +646,8 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
value.map(nreg,val);
|
||||
}
|
||||
} else if( !may_be_copy_of_callee(n) ) {
|
||||
assert( n->is_Copy(), "" );
|
||||
j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
|
||||
assert(n->is_Copy(), "");
|
||||
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
|
||||
}
|
||||
} else if (RegMask::is_vector(n_ideal_reg)) {
|
||||
// If Node 'n' does not change the value mapped by the register,
|
||||
@ -660,7 +666,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
}
|
||||
} else if (n->is_Copy()) {
|
||||
// Note: vector can't be constant and can't be copy of calee.
|
||||
j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
|
||||
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
|
||||
}
|
||||
} else {
|
||||
// If the value occupies a register pair, record same info
|
||||
@ -674,18 +680,18 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
tmp.Remove(nreg);
|
||||
nreg_lo = tmp.find_first_elem();
|
||||
}
|
||||
if( value[nreg] != val || value[nreg_lo] != val ) {
|
||||
if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, nreg_lo)) {
|
||||
j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
|
||||
if (value[nreg] != val || value[nreg_lo] != val) {
|
||||
if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, nreg_lo)) {
|
||||
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
|
||||
} else {
|
||||
regnd.map(nreg , n );
|
||||
regnd.map(nreg_lo, n );
|
||||
value.map(nreg ,val);
|
||||
value.map(nreg_lo,val);
|
||||
}
|
||||
} else if( !may_be_copy_of_callee(n) ) {
|
||||
assert( n->is_Copy(), "" );
|
||||
j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
|
||||
} else if (!may_be_copy_of_callee(n)) {
|
||||
assert(n->is_Copy(), "");
|
||||
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -529,13 +529,13 @@ uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) {
|
||||
// a Def is UP or DOWN. UP means that it should get a register (ie -
|
||||
// it is always in LRP regions), and DOWN means that it is probably
|
||||
// on the stack (ie - it crosses HRP regions).
|
||||
Node ***Reaches = NEW_SPLIT_ARRAY( Node**, _cfg._num_blocks+1 );
|
||||
bool **UP = NEW_SPLIT_ARRAY( bool*, _cfg._num_blocks+1 );
|
||||
Node ***Reaches = NEW_SPLIT_ARRAY( Node**, _cfg.number_of_blocks() + 1);
|
||||
bool **UP = NEW_SPLIT_ARRAY( bool*, _cfg.number_of_blocks() + 1);
|
||||
Node **debug_defs = NEW_SPLIT_ARRAY( Node*, spill_cnt );
|
||||
VectorSet **UP_entry= NEW_SPLIT_ARRAY( VectorSet*, spill_cnt );
|
||||
|
||||
// Initialize Reaches & UP
|
||||
for( bidx = 0; bidx < _cfg._num_blocks+1; bidx++ ) {
|
||||
for (bidx = 0; bidx < _cfg.number_of_blocks() + 1; bidx++) {
|
||||
Reaches[bidx] = NEW_SPLIT_ARRAY( Node*, spill_cnt );
|
||||
UP[bidx] = NEW_SPLIT_ARRAY( bool, spill_cnt );
|
||||
Node **Reachblock = Reaches[bidx];
|
||||
@ -555,13 +555,13 @@ uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) {
|
||||
//----------PASS 1----------
|
||||
//----------Propagation & Node Insertion Code----------
|
||||
// Walk the Blocks in RPO for DEF & USE info
|
||||
for( bidx = 0; bidx < _cfg._num_blocks; bidx++ ) {
|
||||
for( bidx = 0; bidx < _cfg.number_of_blocks(); bidx++ ) {
|
||||
|
||||
if (C->check_node_count(spill_cnt, out_of_nodes)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
b = _cfg._blocks[bidx];
|
||||
b = _cfg.get_block(bidx);
|
||||
// Reaches & UP arrays for this block
|
||||
Reachblock = Reaches[b->_pre_order];
|
||||
UPblock = UP[b->_pre_order];
|
||||
@ -1394,8 +1394,8 @@ uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) {
|
||||
// DEBUG
|
||||
#ifdef ASSERT
|
||||
// Validate all live range index assignments
|
||||
for (bidx = 0; bidx < _cfg._num_blocks; bidx++) {
|
||||
b = _cfg._blocks[bidx];
|
||||
for (bidx = 0; bidx < _cfg.number_of_blocks(); bidx++) {
|
||||
b = _cfg.get_block(bidx);
|
||||
for (insidx = 0; insidx <= b->end_idx(); insidx++) {
|
||||
Node *n = b->_nodes[insidx];
|
||||
uint defidx = _lrg_map.find(n);
|
||||
|
@ -1096,10 +1096,10 @@ typedef BinaryTreeDictionary<Metablock, FreeList> MetablockTreeDictionary;
|
||||
\
|
||||
c2_nonstatic_field(MachCallRuntimeNode, _name, const char*) \
|
||||
\
|
||||
c2_nonstatic_field(PhaseCFG, _num_blocks, uint) \
|
||||
c2_nonstatic_field(PhaseCFG, _number_of_blocks, uint) \
|
||||
c2_nonstatic_field(PhaseCFG, _blocks, Block_List) \
|
||||
c2_nonstatic_field(PhaseCFG, _node_to_block_mapping, Block_Array) \
|
||||
c2_nonstatic_field(PhaseCFG, _broot, Block*) \
|
||||
c2_nonstatic_field(PhaseCFG, _root_block, Block*) \
|
||||
\
|
||||
c2_nonstatic_field(PhaseRegAlloc, _node_regs, OptoRegPair*) \
|
||||
c2_nonstatic_field(PhaseRegAlloc, _node_regs_max_index, uint) \
|
||||
|
Loading…
Reference in New Issue
Block a user