8154826: AArch64: take advantage better of base + shifted offset addressing mode

Reshape address subtree to fit aarch64 addressing mode

Reviewed-by: kvn, aph
This commit is contained in:
Roland Westrelin 2016-05-09 11:34:09 +02:00
parent 7adc4034cd
commit a1e5418d19
10 changed files with 311 additions and 263 deletions

View File

@ -996,6 +996,7 @@ definitions %{
source_hpp %{
#include "gc/shared/cardTableModRefBS.hpp"
#include "opto/addnode.hpp"
class CallStubImpl {
@ -1061,6 +1062,9 @@ class HandlerImpl {
// predicate controlling translation of StoreCM
bool unnecessary_storestore(const Node *storecm);
// predicate controlling addressing modes
bool size_fits_all_mem_uses(AddPNode* addp, int shift);
%}
source %{
@ -3449,11 +3453,6 @@ const int Matcher::float_cmove_cost() {
// Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_postalloc_expand = false;
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers? True for Intel but false for most RISCs
const bool Matcher::clone_shift_expressions = false;
// Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false;
@ -3572,8 +3571,119 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
return FP_REG_mask();
}
bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
Node* u = addp->fast_out(i);
if (u->is_Mem()) {
int opsize = u->as_Mem()->memory_size();
assert(opsize > 0, "unexpected memory operand size");
if (u->as_Mem()->memory_size() != (1<<shift)) {
return false;
}
}
}
return true;
}
const bool Matcher::convi2l_type_required = false;
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers?
bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
if (clone_base_plus_offset_address(m, mstack, address_visited)) {
return true;
}
Node *off = m->in(AddPNode::Offset);
if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
// Are there other uses besides address expressions?
!is_visited(off)) {
address_visited.set(off->_idx); // Flag as address_visited
mstack.push(off->in(2), Visit);
Node *conv = off->in(1);
if (conv->Opcode() == Op_ConvI2L &&
// Are there other uses besides address expressions?
!is_visited(conv)) {
address_visited.set(conv->_idx); // Flag as address_visited
mstack.push(conv->in(1), Pre_Visit);
} else {
mstack.push(conv, Pre_Visit);
}
address_visited.test_set(m->_idx); // Flag as address_visited
mstack.push(m->in(AddPNode::Address), Pre_Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
return true;
} else if (off->Opcode() == Op_ConvI2L &&
// Are there other uses besides address expressions?
!is_visited(off)) {
address_visited.test_set(m->_idx); // Flag as address_visited
address_visited.set(off->_idx); // Flag as address_visited
mstack.push(off->in(1), Pre_Visit);
mstack.push(m->in(AddPNode::Address), Pre_Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
return true;
}
return false;
}
// Transform:
// (AddP base (AddP base address (LShiftL index con)) offset)
// into:
// (AddP base (AddP base offset) (LShiftL index con))
// to take full advantage of ARM's addressing modes
void Compile::reshape_address(AddPNode* addp) {
Node *addr = addp->in(AddPNode::Address);
if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
const AddPNode *addp2 = addr->as_AddP();
if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
// Any use that can't embed the address computation?
for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
Node* u = addp->fast_out(i);
if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
return;
}
}
Node* off = addp->in(AddPNode::Offset);
Node* addr2 = addp2->in(AddPNode::Address);
Node* base = addp->in(AddPNode::Base);
Node* new_addr = NULL;
// Check whether the graph already has the new AddP we need
// before we create one (no GVN available here).
for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
Node* u = addr2->fast_out(i);
if (u->is_AddP() &&
u->in(AddPNode::Base) == base &&
u->in(AddPNode::Address) == addr2 &&
u->in(AddPNode::Offset) == off) {
new_addr = u;
break;
}
}
if (new_addr == NULL) {
new_addr = new AddPNode(base, addr2, off);
}
Node* new_off = addp2->in(AddPNode::Offset);
addp->set_req(AddPNode::Address, new_addr);
if (addr->outcnt() == 0) {
addr->disconnect_inputs(NULL, this);
}
addp->set_req(AddPNode::Offset, new_off);
if (off->outcnt() == 0) {
off->disconnect_inputs(NULL, this);
}
}
}
}
// helper for encoding java_to_runtime calls on sim
//
// this is needed to compute the extra arguments required when
@ -3643,12 +3753,10 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
// encoder that the index needs to be sign extended, so we have to
// enumerate all the cases.
switch (opcode) {
case INDINDEXSCALEDOFFSETI2L:
case INDINDEXSCALEDI2L:
case INDINDEXSCALEDOFFSETI2LN:
case INDINDEXSCALEDI2LN:
case INDINDEXOFFSETI2L:
case INDINDEXOFFSETI2LN:
case INDINDEXI2L:
case INDINDEXI2LN:
scale = Address::sxtw(size);
break;
default:
@ -3658,12 +3766,8 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
if (index == -1) {
(masm.*insn)(reg, Address(base, disp));
} else {
if (disp == 0) {
(masm.*insn)(reg, Address(base, as_Register(index), scale));
} else {
masm.lea(rscratch1, Address(base, disp));
(masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
}
assert(disp == 0, "unsupported address mode: disp = %d", disp);
(masm.*insn)(reg, Address(base, as_Register(index), scale));
}
}
@ -3674,9 +3778,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
Address::extend scale;
switch (opcode) {
case INDINDEXSCALEDOFFSETI2L:
case INDINDEXSCALEDI2L:
case INDINDEXSCALEDOFFSETI2LN:
case INDINDEXSCALEDI2LN:
scale = Address::sxtw(size);
break;
@ -3687,12 +3789,8 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
if (index == -1) {
(masm.*insn)(reg, Address(base, disp));
} else {
if (disp == 0) {
(masm.*insn)(reg, Address(base, as_Register(index), scale));
} else {
masm.lea(rscratch1, Address(base, disp));
(masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
}
assert(disp == 0, "unsupported address mode: disp = %d", disp);
(masm.*insn)(reg, Address(base, as_Register(index), scale));
}
}
@ -6106,65 +6204,10 @@ operand indirect(iRegP reg)
%}
%}
operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP reg (LShiftL lreg scale)) off);
op_cost(INSN_COST);
format %{ "$reg, $lreg lsl($scale), $off" %}
interface(MEMORY_INTER) %{
base($reg);
index($lreg);
scale($scale);
disp($off);
%}
%}
operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP reg (LShiftL lreg scale)) off);
op_cost(INSN_COST);
format %{ "$reg, $lreg lsl($scale), $off" %}
interface(MEMORY_INTER) %{
base($reg);
index($lreg);
scale($scale);
disp($off);
%}
%}
operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP reg (ConvI2L ireg)) off);
op_cost(INSN_COST);
format %{ "$reg, $ireg, $off I2L" %}
interface(MEMORY_INTER) %{
base($reg);
index($ireg);
scale(0x0);
disp($off);
%}
%}
operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
op_cost(INSN_COST);
format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
interface(MEMORY_INTER) %{
base($reg);
index($ireg);
scale($scale);
disp($off);
%}
%}
operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
%{
constraint(ALLOC_IN_RC(ptr_reg));
predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
match(AddP reg (LShiftL (ConvI2L ireg) scale));
op_cost(0);
format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
@ -6179,6 +6222,7 @@ operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
%{
constraint(ALLOC_IN_RC(ptr_reg));
predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
match(AddP reg (LShiftL lreg scale));
op_cost(0);
format %{ "$reg, $lreg lsl($scale)" %}
@ -6190,6 +6234,20 @@ operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
%}
%}
operand indIndexI2L(iRegP reg, iRegI ireg)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP reg (ConvI2L ireg));
op_cost(0);
format %{ "$reg, $ireg, 0, I2L" %}
interface(MEMORY_INTER) %{
base($reg);
index($ireg);
scale(0x0);
disp(0x0);
%}
%}
operand indIndex(iRegP reg, iRegL lreg)
%{
constraint(ALLOC_IN_RC(ptr_reg));
@ -6331,69 +6389,9 @@ operand indirectN(iRegN reg)
%}
%}
operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
op_cost(0);
format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index($lreg);
scale($scale);
disp($off);
%}
%}
operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
op_cost(INSN_COST);
format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index($lreg);
scale($scale);
disp($off);
%}
%}
operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
op_cost(INSN_COST);
format %{ "$reg, $ireg, $off I2L\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index($ireg);
scale(0x0);
disp($off);
%}
%}
operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
op_cost(INSN_COST);
format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index($ireg);
scale($scale);
disp($off);
%}
%}
operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
%{
predicate(Universe::narrow_oop_shift() == 0);
predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
op_cost(0);
@ -6408,7 +6406,7 @@ operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
%{
predicate(Universe::narrow_oop_shift() == 0);
predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) (LShiftL lreg scale));
op_cost(0);
@ -6421,6 +6419,21 @@ operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
%}
%}
operand indIndexI2LN(iRegN reg, iRegI ireg)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) (ConvI2L ireg));
op_cost(0);
format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index($ireg);
scale(0x0);
disp(0x0);
%}
%}
operand indIndexN(iRegN reg, iRegL lreg)
%{
predicate(Universe::narrow_oop_shift() == 0);
@ -6641,9 +6654,8 @@ opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
// memory is used to define read/write location for load/store
// instruction defs. we can turn a memory op into an Address
opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
// operations. it allows the src to be either an iRegI or a (ConvL2I

View File

@ -817,6 +817,16 @@ source_hpp %{
source %{
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers?
bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
return clone_base_plus_offset_address(m, mstack, address_visited);
}
void Compile::reshape_address(AddPNode* addp) {
}
// Optimize load-acquire.
//
// Check if acquire is unnecessary due to following operation that does
@ -2157,11 +2167,6 @@ const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
// Power6 requires postalloc expand (see block.cpp for description of postalloc expand).
const bool Matcher::require_postalloc_expand = true;
// Should the Matcher clone shifts on addressing modes, expecting them to
// be subsumed into complex addressing expressions or compute them into
// registers? True for Intel but false for most RISCs.
const bool Matcher::clone_shift_expressions = false;
// Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway?
// PowerPC requires masked shift counts.

View File

@ -1995,11 +1995,6 @@ const int Matcher::float_cmove_cost() {
// Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_postalloc_expand = false;
// Should the Matcher clone shifts on addressing modes, expecting them to
// be subsumed into complex addressing expressions or compute them into
// registers? True for Intel but false for most RISCs
const bool Matcher::clone_shift_expressions = false;
// Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false;
@ -2133,8 +2128,19 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
return L7_REGP_mask();
}
const bool Matcher::convi2l_type_required = true;
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers?
bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
return clone_base_plus_offset_address(m, mstack, address_visited);
}
void Compile::reshape_address(AddPNode* addp) {
}
%}

View File

@ -1586,6 +1586,8 @@ class HandlerImpl {
source %{
#include "opto/addnode.hpp"
// Emit exception handler code.
// Stuff framesize into a register and call a VM stub routine.
int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
@ -1861,8 +1863,79 @@ const bool Matcher::pass_original_key_for_aes() {
return false;
}
const bool Matcher::convi2l_type_required = true;
// Check for shift by small constant as well
static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
shift->in(2)->get_int() <= 3 &&
// Are there other uses besides address expressions?
!matcher->is_visited(shift)) {
address_visited.set(shift->_idx); // Flag as address_visited
mstack.push(shift->in(2), Matcher::Visit);
Node *conv = shift->in(1);
#ifdef _LP64
// Allow Matcher to match the rule which bypass
// ConvI2L operation for an array index on LP64
// if the index value is positive.
if (conv->Opcode() == Op_ConvI2L &&
conv->as_Type()->type()->is_long()->_lo >= 0 &&
// Are there other uses besides address expressions?
!matcher->is_visited(conv)) {
address_visited.set(conv->_idx); // Flag as address_visited
mstack.push(conv->in(1), Matcher::Pre_Visit);
} else
#endif
mstack.push(conv, Matcher::Pre_Visit);
return true;
}
return false;
}
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers?
bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
Node *off = m->in(AddPNode::Offset);
if (off->is_Con()) {
address_visited.test_set(m->_idx); // Flag as address_visited
Node *adr = m->in(AddPNode::Address);
// Intel can handle 2 adds in addressing mode
// AtomicAdd is not an addressing expression.
// Cheap to find it by looking for screwy base.
if (adr->is_AddP() &&
!adr->in(AddPNode::Base)->is_top() &&
// Are there other uses besides address expressions?
!is_visited(adr)) {
address_visited.set(adr->_idx); // Flag as address_visited
Node *shift = adr->in(AddPNode::Offset);
if (!clone_shift(shift, this, mstack, address_visited)) {
mstack.push(shift, Pre_Visit);
}
mstack.push(adr->in(AddPNode::Address), Pre_Visit);
mstack.push(adr->in(AddPNode::Base), Pre_Visit);
} else {
mstack.push(adr, Pre_Visit);
}
// Clone X+offset as it also folds into most addressing expressions
mstack.push(off, Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
return true;
} else if (clone_shift(off, this, mstack, address_visited)) {
address_visited.test_set(m->_idx); // Flag as address_visited
mstack.push(m->in(AddPNode::Address), Pre_Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
return true;
}
return false;
}
void Compile::reshape_address(AddPNode* addp) {
}
// Helper methods for MachSpillCopyNode::implementation().
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st) {

View File

@ -1438,11 +1438,6 @@ const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimi
// Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_postalloc_expand = false;
// Should the Matcher clone shifts on addressing modes, expecting them to
// be subsumed into complex addressing expressions or compute them into
// registers? True for Intel but false for most RISCs
const bool Matcher::clone_shift_expressions = true;
// Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false;

View File

@ -1646,11 +1646,6 @@ const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
// Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_postalloc_expand = false;
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers? True for Intel but false for most RISCs
const bool Matcher::clone_shift_expressions = true;
// Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false;

View File

@ -2905,6 +2905,8 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
}
}
#endif
// platform dependent reshaping of the address expression
reshape_address(n->as_AddP());
break;
}

View File

@ -44,6 +44,7 @@
#include "trace/tracing.hpp"
#include "utilities/ticks.hpp"
class AddPNode;
class Block;
class Bundle;
class C2Compiler;
@ -579,6 +580,8 @@ class Compile : public Phase {
int _scratch_const_size; // For temporary code buffers.
bool _in_scratch_emit_size; // true when in scratch_emit_size.
void reshape_address(AddPNode* n);
public:
// Accessors

View File

@ -963,44 +963,6 @@ static void match_alias_type(Compile* C, Node* n, Node* m) {
}
#endif
//------------------------------MStack-----------------------------------------
// State and MStack class used in xform() and find_shared() iterative methods.
enum Node_State { Pre_Visit, // node has to be pre-visited
Visit, // visit node
Post_Visit, // post-visit node
Alt_Post_Visit // alternative post-visit path
};
class MStack: public Node_Stack {
public:
MStack(int size) : Node_Stack(size) { }
void push(Node *n, Node_State ns) {
Node_Stack::push(n, (uint)ns);
}
void push(Node *n, Node_State ns, Node *parent, int indx) {
++_inode_top;
if ((_inode_top + 1) >= _inode_max) grow();
_inode_top->node = parent;
_inode_top->indx = (uint)indx;
++_inode_top;
_inode_top->node = n;
_inode_top->indx = (uint)ns;
}
Node *parent() {
pop();
return node();
}
Node_State state() const {
return (Node_State)index();
}
void set_state(Node_State ns) {
set_index((uint)ns);
}
};
//------------------------------xform------------------------------------------
// Given a Node in old-space, Match him (Label/Reduce) to produce a machine
// Node in new-space. Given a new-space Node, recursively walk his children.
@ -2046,37 +2008,22 @@ bool Matcher::is_bmi_pattern(Node *n, Node *m) {
}
#endif // X86
// A method-klass-holder may be passed in the inline_cache_reg
// and then expanded into the inline_cache_reg and a method_oop register
// defined in ad_<arch>.cpp
// Check for shift by small constant as well
static bool clone_shift(Node* shift, Matcher* matcher, MStack& mstack, VectorSet& address_visited) {
if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
shift->in(2)->get_int() <= 3 &&
// Are there other uses besides address expressions?
!matcher->is_visited(shift)) {
address_visited.set(shift->_idx); // Flag as address_visited
mstack.push(shift->in(2), Visit);
Node *conv = shift->in(1);
#ifdef _LP64
// Allow Matcher to match the rule which bypass
// ConvI2L operation for an array index on LP64
// if the index value is positive.
if (conv->Opcode() == Op_ConvI2L &&
conv->as_Type()->type()->is_long()->_lo >= 0 &&
// Are there other uses besides address expressions?
!matcher->is_visited(conv)) {
address_visited.set(conv->_idx); // Flag as address_visited
mstack.push(conv->in(1), Pre_Visit);
} else
#endif
mstack.push(conv, Pre_Visit);
bool Matcher::clone_base_plus_offset_address(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
Node *off = m->in(AddPNode::Offset);
if (off->is_Con()) {
address_visited.test_set(m->_idx); // Flag as address_visited
mstack.push(m->in(AddPNode::Address), Pre_Visit);
// Clone X+offset as it also folds into most addressing expressions
mstack.push(off, Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
return true;
}
return false;
}
// A method-klass-holder may be passed in the inline_cache_reg
// and then expanded into the inline_cache_reg and a method_oop register
// defined in ad_<arch>.cpp
//------------------------------find_shared------------------------------------
// Set bits if Node is shared or otherwise a root
@ -2251,40 +2198,9 @@ void Matcher::find_shared( Node *n ) {
// But they should be marked as shared if there are other uses
// besides address expressions.
Node *off = m->in(AddPNode::Offset);
if (off->is_Con()) {
address_visited.test_set(m->_idx); // Flag as address_visited
Node *adr = m->in(AddPNode::Address);
// Intel, ARM and friends can handle 2 adds in addressing mode
if( clone_shift_expressions && adr->is_AddP() &&
// AtomicAdd is not an addressing expression.
// Cheap to find it by looking for screwy base.
!adr->in(AddPNode::Base)->is_top() &&
// Are there other uses besides address expressions?
!is_visited(adr) ) {
address_visited.set(adr->_idx); // Flag as address_visited
Node *shift = adr->in(AddPNode::Offset);
if (!clone_shift(shift, this, mstack, address_visited)) {
mstack.push(shift, Pre_Visit);
}
mstack.push(adr->in(AddPNode::Address), Pre_Visit);
mstack.push(adr->in(AddPNode::Base), Pre_Visit);
} else { // Sparc, Alpha, PPC and friends
mstack.push(adr, Pre_Visit);
}
// Clone X+offset as it also folds into most addressing expressions
mstack.push(off, Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
continue; // for(int i = ...)
} else if (clone_shift_expressions &&
clone_shift(off, this, mstack, address_visited)) {
address_visited.test_set(m->_idx); // Flag as address_visited
mstack.push(m->in(AddPNode::Address), Pre_Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
continue;
} // if( off->is_Con() )
if (clone_address_expressions(m->as_AddP(), mstack, address_visited)) {
continue;
}
} // if( mem_op &&
mstack.push(m, Pre_Visit);
} // for(int i = ...)

View File

@ -40,6 +40,45 @@ class MachOper;
//---------------------------Matcher-------------------------------------------
class Matcher : public PhaseTransform {
friend class VMStructs;
public:
// State and MStack class used in xform() and find_shared() iterative methods.
enum Node_State { Pre_Visit, // node has to be pre-visited
Visit, // visit node
Post_Visit, // post-visit node
Alt_Post_Visit // alternative post-visit path
};
class MStack: public Node_Stack {
public:
MStack(int size) : Node_Stack(size) { }
void push(Node *n, Node_State ns) {
Node_Stack::push(n, (uint)ns);
}
void push(Node *n, Node_State ns, Node *parent, int indx) {
++_inode_top;
if ((_inode_top + 1) >= _inode_max) grow();
_inode_top->node = parent;
_inode_top->indx = (uint)indx;
++_inode_top;
_inode_top->node = n;
_inode_top->indx = (uint)ns;
}
Node *parent() {
pop();
return node();
}
Node_State state() const {
return (Node_State)index();
}
void set_state(Node_State ns) {
set_index((uint)ns);
}
};
private:
// Private arena of State objects
ResourceArea _states_arena;
@ -411,7 +450,9 @@ public:
// Should the Matcher clone shifts on addressing modes, expecting them to
// be subsumed into complex addressing expressions or compute them into
// registers? True for Intel but false for most RISCs
static const bool clone_shift_expressions;
bool clone_address_expressions(AddPNode* m, MStack& mstack, VectorSet& address_visited);
// Clone base + offset address expression
bool clone_base_plus_offset_address(AddPNode* m, MStack& mstack, VectorSet& address_visited);
static bool narrow_oop_use_complex_address();
static bool narrow_klass_use_complex_address();