8154826: AArch64: take advantage better of base + shifted offset addressing mode

Reshape address subtree to fit aarch64 addressing mode

Reviewed-by: kvn, aph
This commit is contained in:
Roland Westrelin 2016-05-09 11:34:09 +02:00
parent 7adc4034cd
commit a1e5418d19
10 changed files with 311 additions and 263 deletions

View File

@ -996,6 +996,7 @@ definitions %{
source_hpp %{ source_hpp %{
#include "gc/shared/cardTableModRefBS.hpp" #include "gc/shared/cardTableModRefBS.hpp"
#include "opto/addnode.hpp"
class CallStubImpl { class CallStubImpl {
@ -1061,6 +1062,9 @@ class HandlerImpl {
// predicate controlling translation of StoreCM // predicate controlling translation of StoreCM
bool unnecessary_storestore(const Node *storecm); bool unnecessary_storestore(const Node *storecm);
// predicate controlling addressing modes
bool size_fits_all_mem_uses(AddPNode* addp, int shift);
%} %}
source %{ source %{
@ -3449,11 +3453,6 @@ const int Matcher::float_cmove_cost() {
// Does the CPU require late expand (see block.cpp for description of late expand)? // Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_postalloc_expand = false; const bool Matcher::require_postalloc_expand = false;
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers? True for Intel but false for most RISCs
const bool Matcher::clone_shift_expressions = false;
// Do we need to mask the count passed to shift instructions or does // Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway? // the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false; const bool Matcher::need_masked_shift_count = false;
@ -3572,8 +3571,119 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
return FP_REG_mask(); return FP_REG_mask();
} }
bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
Node* u = addp->fast_out(i);
if (u->is_Mem()) {
int opsize = u->as_Mem()->memory_size();
assert(opsize > 0, "unexpected memory operand size");
if (u->as_Mem()->memory_size() != (1<<shift)) {
return false;
}
}
}
return true;
}
const bool Matcher::convi2l_type_required = false; const bool Matcher::convi2l_type_required = false;
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers?
bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
if (clone_base_plus_offset_address(m, mstack, address_visited)) {
return true;
}
Node *off = m->in(AddPNode::Offset);
if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
// Are there other uses besides address expressions?
!is_visited(off)) {
address_visited.set(off->_idx); // Flag as address_visited
mstack.push(off->in(2), Visit);
Node *conv = off->in(1);
if (conv->Opcode() == Op_ConvI2L &&
// Are there other uses besides address expressions?
!is_visited(conv)) {
address_visited.set(conv->_idx); // Flag as address_visited
mstack.push(conv->in(1), Pre_Visit);
} else {
mstack.push(conv, Pre_Visit);
}
address_visited.test_set(m->_idx); // Flag as address_visited
mstack.push(m->in(AddPNode::Address), Pre_Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
return true;
} else if (off->Opcode() == Op_ConvI2L &&
// Are there other uses besides address expressions?
!is_visited(off)) {
address_visited.test_set(m->_idx); // Flag as address_visited
address_visited.set(off->_idx); // Flag as address_visited
mstack.push(off->in(1), Pre_Visit);
mstack.push(m->in(AddPNode::Address), Pre_Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
return true;
}
return false;
}
// Transform:
// (AddP base (AddP base address (LShiftL index con)) offset)
// into:
// (AddP base (AddP base offset) (LShiftL index con))
// to take full advantage of ARM's addressing modes
void Compile::reshape_address(AddPNode* addp) {
Node *addr = addp->in(AddPNode::Address);
if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
const AddPNode *addp2 = addr->as_AddP();
if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
// Any use that can't embed the address computation?
for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
Node* u = addp->fast_out(i);
if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
return;
}
}
Node* off = addp->in(AddPNode::Offset);
Node* addr2 = addp2->in(AddPNode::Address);
Node* base = addp->in(AddPNode::Base);
Node* new_addr = NULL;
// Check whether the graph already has the new AddP we need
// before we create one (no GVN available here).
for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
Node* u = addr2->fast_out(i);
if (u->is_AddP() &&
u->in(AddPNode::Base) == base &&
u->in(AddPNode::Address) == addr2 &&
u->in(AddPNode::Offset) == off) {
new_addr = u;
break;
}
}
if (new_addr == NULL) {
new_addr = new AddPNode(base, addr2, off);
}
Node* new_off = addp2->in(AddPNode::Offset);
addp->set_req(AddPNode::Address, new_addr);
if (addr->outcnt() == 0) {
addr->disconnect_inputs(NULL, this);
}
addp->set_req(AddPNode::Offset, new_off);
if (off->outcnt() == 0) {
off->disconnect_inputs(NULL, this);
}
}
}
}
// helper for encoding java_to_runtime calls on sim // helper for encoding java_to_runtime calls on sim
// //
// this is needed to compute the extra arguments required when // this is needed to compute the extra arguments required when
@ -3643,12 +3753,10 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
// encoder that the index needs to be sign extended, so we have to // encoder that the index needs to be sign extended, so we have to
// enumerate all the cases. // enumerate all the cases.
switch (opcode) { switch (opcode) {
case INDINDEXSCALEDOFFSETI2L:
case INDINDEXSCALEDI2L: case INDINDEXSCALEDI2L:
case INDINDEXSCALEDOFFSETI2LN:
case INDINDEXSCALEDI2LN: case INDINDEXSCALEDI2LN:
case INDINDEXOFFSETI2L: case INDINDEXI2L:
case INDINDEXOFFSETI2LN: case INDINDEXI2LN:
scale = Address::sxtw(size); scale = Address::sxtw(size);
break; break;
default: default:
@ -3658,12 +3766,8 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
if (index == -1) { if (index == -1) {
(masm.*insn)(reg, Address(base, disp)); (masm.*insn)(reg, Address(base, disp));
} else { } else {
if (disp == 0) { assert(disp == 0, "unsupported address mode: disp = %d", disp);
(masm.*insn)(reg, Address(base, as_Register(index), scale)); (masm.*insn)(reg, Address(base, as_Register(index), scale));
} else {
masm.lea(rscratch1, Address(base, disp));
(masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
}
} }
} }
@ -3674,9 +3778,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
Address::extend scale; Address::extend scale;
switch (opcode) { switch (opcode) {
case INDINDEXSCALEDOFFSETI2L:
case INDINDEXSCALEDI2L: case INDINDEXSCALEDI2L:
case INDINDEXSCALEDOFFSETI2LN:
case INDINDEXSCALEDI2LN: case INDINDEXSCALEDI2LN:
scale = Address::sxtw(size); scale = Address::sxtw(size);
break; break;
@ -3687,12 +3789,8 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
if (index == -1) { if (index == -1) {
(masm.*insn)(reg, Address(base, disp)); (masm.*insn)(reg, Address(base, disp));
} else { } else {
if (disp == 0) { assert(disp == 0, "unsupported address mode: disp = %d", disp);
(masm.*insn)(reg, Address(base, as_Register(index), scale)); (masm.*insn)(reg, Address(base, as_Register(index), scale));
} else {
masm.lea(rscratch1, Address(base, disp));
(masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
}
} }
} }
@ -6106,65 +6204,10 @@ operand indirect(iRegP reg)
%} %}
%} %}
operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP reg (LShiftL lreg scale)) off);
op_cost(INSN_COST);
format %{ "$reg, $lreg lsl($scale), $off" %}
interface(MEMORY_INTER) %{
base($reg);
index($lreg);
scale($scale);
disp($off);
%}
%}
operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP reg (LShiftL lreg scale)) off);
op_cost(INSN_COST);
format %{ "$reg, $lreg lsl($scale), $off" %}
interface(MEMORY_INTER) %{
base($reg);
index($lreg);
scale($scale);
disp($off);
%}
%}
operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP reg (ConvI2L ireg)) off);
op_cost(INSN_COST);
format %{ "$reg, $ireg, $off I2L" %}
interface(MEMORY_INTER) %{
base($reg);
index($ireg);
scale(0x0);
disp($off);
%}
%}
operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
op_cost(INSN_COST);
format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
interface(MEMORY_INTER) %{
base($reg);
index($ireg);
scale($scale);
disp($off);
%}
%}
operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale) operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
%{ %{
constraint(ALLOC_IN_RC(ptr_reg)); constraint(ALLOC_IN_RC(ptr_reg));
predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
match(AddP reg (LShiftL (ConvI2L ireg) scale)); match(AddP reg (LShiftL (ConvI2L ireg) scale));
op_cost(0); op_cost(0);
format %{ "$reg, $ireg sxtw($scale), 0, I2L" %} format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
@ -6179,6 +6222,7 @@ operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale) operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
%{ %{
constraint(ALLOC_IN_RC(ptr_reg)); constraint(ALLOC_IN_RC(ptr_reg));
predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
match(AddP reg (LShiftL lreg scale)); match(AddP reg (LShiftL lreg scale));
op_cost(0); op_cost(0);
format %{ "$reg, $lreg lsl($scale)" %} format %{ "$reg, $lreg lsl($scale)" %}
@ -6190,6 +6234,20 @@ operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
%} %}
%} %}
operand indIndexI2L(iRegP reg, iRegI ireg)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP reg (ConvI2L ireg));
op_cost(0);
format %{ "$reg, $ireg, 0, I2L" %}
interface(MEMORY_INTER) %{
base($reg);
index($ireg);
scale(0x0);
disp(0x0);
%}
%}
operand indIndex(iRegP reg, iRegL lreg) operand indIndex(iRegP reg, iRegL lreg)
%{ %{
constraint(ALLOC_IN_RC(ptr_reg)); constraint(ALLOC_IN_RC(ptr_reg));
@ -6331,69 +6389,9 @@ operand indirectN(iRegN reg)
%} %}
%} %}
operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
op_cost(0);
format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index($lreg);
scale($scale);
disp($off);
%}
%}
operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
op_cost(INSN_COST);
format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index($lreg);
scale($scale);
disp($off);
%}
%}
operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
op_cost(INSN_COST);
format %{ "$reg, $ireg, $off I2L\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index($ireg);
scale(0x0);
disp($off);
%}
%}
operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
op_cost(INSN_COST);
format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index($ireg);
scale($scale);
disp($off);
%}
%}
operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale) operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
%{ %{
predicate(Universe::narrow_oop_shift() == 0); predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
constraint(ALLOC_IN_RC(ptr_reg)); constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)); match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
op_cost(0); op_cost(0);
@ -6408,7 +6406,7 @@ operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale) operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
%{ %{
predicate(Universe::narrow_oop_shift() == 0); predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
constraint(ALLOC_IN_RC(ptr_reg)); constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) (LShiftL lreg scale)); match(AddP (DecodeN reg) (LShiftL lreg scale));
op_cost(0); op_cost(0);
@ -6421,6 +6419,21 @@ operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
%} %}
%} %}
operand indIndexI2LN(iRegN reg, iRegI ireg)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) (ConvI2L ireg));
op_cost(0);
format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index($ireg);
scale(0x0);
disp(0x0);
%}
%}
operand indIndexN(iRegN reg, iRegL lreg) operand indIndexN(iRegN reg, iRegL lreg)
%{ %{
predicate(Universe::narrow_oop_shift() == 0); predicate(Universe::narrow_oop_shift() == 0);
@ -6641,9 +6654,8 @@ opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
// memory is used to define read/write location for load/store // memory is used to define read/write location for load/store
// instruction defs. we can turn a memory op into an Address // instruction defs. we can turn a memory op into an Address
opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL, opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN); indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
// iRegIorL2I is used for src inputs in rules for 32 bit int (I) // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
// operations. it allows the src to be either an iRegI or a (ConvL2I // operations. it allows the src to be either an iRegI or a (ConvL2I

View File

@ -817,6 +817,16 @@ source_hpp %{
source %{ source %{
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers?
bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
return clone_base_plus_offset_address(m, mstack, address_visited);
}
void Compile::reshape_address(AddPNode* addp) {
}
// Optimize load-acquire. // Optimize load-acquire.
// //
// Check if acquire is unnecessary due to following operation that does // Check if acquire is unnecessary due to following operation that does
@ -2157,11 +2167,6 @@ const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
// Power6 requires postalloc expand (see block.cpp for description of postalloc expand). // Power6 requires postalloc expand (see block.cpp for description of postalloc expand).
const bool Matcher::require_postalloc_expand = true; const bool Matcher::require_postalloc_expand = true;
// Should the Matcher clone shifts on addressing modes, expecting them to
// be subsumed into complex addressing expressions or compute them into
// registers? True for Intel but false for most RISCs.
const bool Matcher::clone_shift_expressions = false;
// Do we need to mask the count passed to shift instructions or does // Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway? // the cpu only look at the lower 5/6 bits anyway?
// PowerPC requires masked shift counts. // PowerPC requires masked shift counts.

View File

@ -1995,11 +1995,6 @@ const int Matcher::float_cmove_cost() {
// Does the CPU require late expand (see block.cpp for description of late expand)? // Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_postalloc_expand = false; const bool Matcher::require_postalloc_expand = false;
// Should the Matcher clone shifts on addressing modes, expecting them to
// be subsumed into complex addressing expressions or compute them into
// registers? True for Intel but false for most RISCs
const bool Matcher::clone_shift_expressions = false;
// Do we need to mask the count passed to shift instructions or does // Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway? // the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false; const bool Matcher::need_masked_shift_count = false;
@ -2133,8 +2128,19 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
return L7_REGP_mask(); return L7_REGP_mask();
} }
const bool Matcher::convi2l_type_required = true; const bool Matcher::convi2l_type_required = true;
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers?
bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
return clone_base_plus_offset_address(m, mstack, address_visited);
}
void Compile::reshape_address(AddPNode* addp) {
}
%} %}

View File

@ -1586,6 +1586,8 @@ class HandlerImpl {
source %{ source %{
#include "opto/addnode.hpp"
// Emit exception handler code. // Emit exception handler code.
// Stuff framesize into a register and call a VM stub routine. // Stuff framesize into a register and call a VM stub routine.
int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
@ -1861,8 +1863,79 @@ const bool Matcher::pass_original_key_for_aes() {
return false; return false;
} }
const bool Matcher::convi2l_type_required = true; const bool Matcher::convi2l_type_required = true;
// Check for shift by small constant as well
static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
shift->in(2)->get_int() <= 3 &&
// Are there other uses besides address expressions?
!matcher->is_visited(shift)) {
address_visited.set(shift->_idx); // Flag as address_visited
mstack.push(shift->in(2), Matcher::Visit);
Node *conv = shift->in(1);
#ifdef _LP64
// Allow Matcher to match the rule which bypass
// ConvI2L operation for an array index on LP64
// if the index value is positive.
if (conv->Opcode() == Op_ConvI2L &&
conv->as_Type()->type()->is_long()->_lo >= 0 &&
// Are there other uses besides address expressions?
!matcher->is_visited(conv)) {
address_visited.set(conv->_idx); // Flag as address_visited
mstack.push(conv->in(1), Matcher::Pre_Visit);
} else
#endif
mstack.push(conv, Matcher::Pre_Visit);
return true;
}
return false;
}
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers?
bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
Node *off = m->in(AddPNode::Offset);
if (off->is_Con()) {
address_visited.test_set(m->_idx); // Flag as address_visited
Node *adr = m->in(AddPNode::Address);
// Intel can handle 2 adds in addressing mode
// AtomicAdd is not an addressing expression.
// Cheap to find it by looking for screwy base.
if (adr->is_AddP() &&
!adr->in(AddPNode::Base)->is_top() &&
// Are there other uses besides address expressions?
!is_visited(adr)) {
address_visited.set(adr->_idx); // Flag as address_visited
Node *shift = adr->in(AddPNode::Offset);
if (!clone_shift(shift, this, mstack, address_visited)) {
mstack.push(shift, Pre_Visit);
}
mstack.push(adr->in(AddPNode::Address), Pre_Visit);
mstack.push(adr->in(AddPNode::Base), Pre_Visit);
} else {
mstack.push(adr, Pre_Visit);
}
// Clone X+offset as it also folds into most addressing expressions
mstack.push(off, Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
return true;
} else if (clone_shift(off, this, mstack, address_visited)) {
address_visited.test_set(m->_idx); // Flag as address_visited
mstack.push(m->in(AddPNode::Address), Pre_Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
return true;
}
return false;
}
void Compile::reshape_address(AddPNode* addp) {
}
// Helper methods for MachSpillCopyNode::implementation(). // Helper methods for MachSpillCopyNode::implementation().
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st) { int src_hi, int dst_hi, uint ireg, outputStream* st) {

View File

@ -1438,11 +1438,6 @@ const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimi
// Does the CPU require late expand (see block.cpp for description of late expand)? // Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_postalloc_expand = false; const bool Matcher::require_postalloc_expand = false;
// Should the Matcher clone shifts on addressing modes, expecting them to
// be subsumed into complex addressing expressions or compute them into
// registers? True for Intel but false for most RISCs
const bool Matcher::clone_shift_expressions = true;
// Do we need to mask the count passed to shift instructions or does // Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway? // the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false; const bool Matcher::need_masked_shift_count = false;

View File

@ -1646,11 +1646,6 @@ const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
// Does the CPU require late expand (see block.cpp for description of late expand)? // Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_postalloc_expand = false; const bool Matcher::require_postalloc_expand = false;
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers? True for Intel but false for most RISCs
const bool Matcher::clone_shift_expressions = true;
// Do we need to mask the count passed to shift instructions or does // Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway? // the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false; const bool Matcher::need_masked_shift_count = false;

View File

@ -2905,6 +2905,8 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
} }
} }
#endif #endif
// platform dependent reshaping of the address expression
reshape_address(n->as_AddP());
break; break;
} }

View File

@ -44,6 +44,7 @@
#include "trace/tracing.hpp" #include "trace/tracing.hpp"
#include "utilities/ticks.hpp" #include "utilities/ticks.hpp"
class AddPNode;
class Block; class Block;
class Bundle; class Bundle;
class C2Compiler; class C2Compiler;
@ -579,6 +580,8 @@ class Compile : public Phase {
int _scratch_const_size; // For temporary code buffers. int _scratch_const_size; // For temporary code buffers.
bool _in_scratch_emit_size; // true when in scratch_emit_size. bool _in_scratch_emit_size; // true when in scratch_emit_size.
void reshape_address(AddPNode* n);
public: public:
// Accessors // Accessors

View File

@ -963,44 +963,6 @@ static void match_alias_type(Compile* C, Node* n, Node* m) {
} }
#endif #endif
//------------------------------MStack-----------------------------------------
// State and MStack class used in xform() and find_shared() iterative methods.
enum Node_State { Pre_Visit, // node has to be pre-visited
Visit, // visit node
Post_Visit, // post-visit node
Alt_Post_Visit // alternative post-visit path
};
class MStack: public Node_Stack {
public:
MStack(int size) : Node_Stack(size) { }
void push(Node *n, Node_State ns) {
Node_Stack::push(n, (uint)ns);
}
void push(Node *n, Node_State ns, Node *parent, int indx) {
++_inode_top;
if ((_inode_top + 1) >= _inode_max) grow();
_inode_top->node = parent;
_inode_top->indx = (uint)indx;
++_inode_top;
_inode_top->node = n;
_inode_top->indx = (uint)ns;
}
Node *parent() {
pop();
return node();
}
Node_State state() const {
return (Node_State)index();
}
void set_state(Node_State ns) {
set_index((uint)ns);
}
};
//------------------------------xform------------------------------------------ //------------------------------xform------------------------------------------
// Given a Node in old-space, Match him (Label/Reduce) to produce a machine // Given a Node in old-space, Match him (Label/Reduce) to produce a machine
// Node in new-space. Given a new-space Node, recursively walk his children. // Node in new-space. Given a new-space Node, recursively walk his children.
@ -2046,37 +2008,22 @@ bool Matcher::is_bmi_pattern(Node *n, Node *m) {
} }
#endif // X86 #endif // X86
// A method-klass-holder may be passed in the inline_cache_reg bool Matcher::clone_base_plus_offset_address(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
// and then expanded into the inline_cache_reg and a method_oop register Node *off = m->in(AddPNode::Offset);
// defined in ad_<arch>.cpp if (off->is_Con()) {
address_visited.test_set(m->_idx); // Flag as address_visited
// Check for shift by small constant as well mstack.push(m->in(AddPNode::Address), Pre_Visit);
static bool clone_shift(Node* shift, Matcher* matcher, MStack& mstack, VectorSet& address_visited) { // Clone X+offset as it also folds into most addressing expressions
if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && mstack.push(off, Visit);
shift->in(2)->get_int() <= 3 && mstack.push(m->in(AddPNode::Base), Pre_Visit);
// Are there other uses besides address expressions?
!matcher->is_visited(shift)) {
address_visited.set(shift->_idx); // Flag as address_visited
mstack.push(shift->in(2), Visit);
Node *conv = shift->in(1);
#ifdef _LP64
// Allow Matcher to match the rule which bypass
// ConvI2L operation for an array index on LP64
// if the index value is positive.
if (conv->Opcode() == Op_ConvI2L &&
conv->as_Type()->type()->is_long()->_lo >= 0 &&
// Are there other uses besides address expressions?
!matcher->is_visited(conv)) {
address_visited.set(conv->_idx); // Flag as address_visited
mstack.push(conv->in(1), Pre_Visit);
} else
#endif
mstack.push(conv, Pre_Visit);
return true; return true;
} }
return false; return false;
} }
// A method-klass-holder may be passed in the inline_cache_reg
// and then expanded into the inline_cache_reg and a method_oop register
// defined in ad_<arch>.cpp
//------------------------------find_shared------------------------------------ //------------------------------find_shared------------------------------------
// Set bits if Node is shared or otherwise a root // Set bits if Node is shared or otherwise a root
@ -2251,40 +2198,9 @@ void Matcher::find_shared( Node *n ) {
// But they should be marked as shared if there are other uses // But they should be marked as shared if there are other uses
// besides address expressions. // besides address expressions.
Node *off = m->in(AddPNode::Offset); if (clone_address_expressions(m->as_AddP(), mstack, address_visited)) {
if (off->is_Con()) {
address_visited.test_set(m->_idx); // Flag as address_visited
Node *adr = m->in(AddPNode::Address);
// Intel, ARM and friends can handle 2 adds in addressing mode
if( clone_shift_expressions && adr->is_AddP() &&
// AtomicAdd is not an addressing expression.
// Cheap to find it by looking for screwy base.
!adr->in(AddPNode::Base)->is_top() &&
// Are there other uses besides address expressions?
!is_visited(adr) ) {
address_visited.set(adr->_idx); // Flag as address_visited
Node *shift = adr->in(AddPNode::Offset);
if (!clone_shift(shift, this, mstack, address_visited)) {
mstack.push(shift, Pre_Visit);
}
mstack.push(adr->in(AddPNode::Address), Pre_Visit);
mstack.push(adr->in(AddPNode::Base), Pre_Visit);
} else { // Sparc, Alpha, PPC and friends
mstack.push(adr, Pre_Visit);
}
// Clone X+offset as it also folds into most addressing expressions
mstack.push(off, Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
continue; // for(int i = ...)
} else if (clone_shift_expressions &&
clone_shift(off, this, mstack, address_visited)) {
address_visited.test_set(m->_idx); // Flag as address_visited
mstack.push(m->in(AddPNode::Address), Pre_Visit);
mstack.push(m->in(AddPNode::Base), Pre_Visit);
continue; continue;
} // if( off->is_Con() ) }
} // if( mem_op && } // if( mem_op &&
mstack.push(m, Pre_Visit); mstack.push(m, Pre_Visit);
} // for(int i = ...) } // for(int i = ...)

View File

@ -40,6 +40,45 @@ class MachOper;
//---------------------------Matcher------------------------------------------- //---------------------------Matcher-------------------------------------------
class Matcher : public PhaseTransform { class Matcher : public PhaseTransform {
friend class VMStructs; friend class VMStructs;
public:
// State and MStack class used in xform() and find_shared() iterative methods.
enum Node_State { Pre_Visit, // node has to be pre-visited
Visit, // visit node
Post_Visit, // post-visit node
Alt_Post_Visit // alternative post-visit path
};
class MStack: public Node_Stack {
public:
MStack(int size) : Node_Stack(size) { }
void push(Node *n, Node_State ns) {
Node_Stack::push(n, (uint)ns);
}
void push(Node *n, Node_State ns, Node *parent, int indx) {
++_inode_top;
if ((_inode_top + 1) >= _inode_max) grow();
_inode_top->node = parent;
_inode_top->indx = (uint)indx;
++_inode_top;
_inode_top->node = n;
_inode_top->indx = (uint)ns;
}
Node *parent() {
pop();
return node();
}
Node_State state() const {
return (Node_State)index();
}
void set_state(Node_State ns) {
set_index((uint)ns);
}
};
private:
// Private arena of State objects // Private arena of State objects
ResourceArea _states_arena; ResourceArea _states_arena;
@ -411,7 +450,9 @@ public:
// Should the Matcher clone shifts on addressing modes, expecting them to // Should the Matcher clone shifts on addressing modes, expecting them to
// be subsumed into complex addressing expressions or compute them into // be subsumed into complex addressing expressions or compute them into
// registers? True for Intel but false for most RISCs // registers? True for Intel but false for most RISCs
static const bool clone_shift_expressions; bool clone_address_expressions(AddPNode* m, MStack& mstack, VectorSet& address_visited);
// Clone base + offset address expression
bool clone_base_plus_offset_address(AddPNode* m, MStack& mstack, VectorSet& address_visited);
static bool narrow_oop_use_complex_address(); static bool narrow_oop_use_complex_address();
static bool narrow_klass_use_complex_address(); static bool narrow_klass_use_complex_address();