From a1e5418d19386c3c49e5af8a0644c2cce4c097e2 Mon Sep 17 00:00:00 2001 From: Roland Westrelin Date: Mon, 9 May 2016 11:34:09 +0200 Subject: [PATCH] 8154826: AArch64: take advantage better of base + shifted offset addressing mode Reshape address subtree to fit aarch64 addressing mode Reviewed-by: kvn, aph --- hotspot/src/cpu/aarch64/vm/aarch64.ad | 300 +++++++++++++------------- hotspot/src/cpu/ppc/vm/ppc.ad | 15 +- hotspot/src/cpu/sparc/vm/sparc.ad | 16 +- hotspot/src/cpu/x86/vm/x86.ad | 73 +++++++ hotspot/src/cpu/x86/vm/x86_32.ad | 5 - hotspot/src/cpu/x86/vm/x86_64.ad | 5 - hotspot/src/share/vm/opto/compile.cpp | 2 + hotspot/src/share/vm/opto/compile.hpp | 3 + hotspot/src/share/vm/opto/matcher.cpp | 112 ++-------- hotspot/src/share/vm/opto/matcher.hpp | 43 +++- 10 files changed, 311 insertions(+), 263 deletions(-) diff --git a/hotspot/src/cpu/aarch64/vm/aarch64.ad b/hotspot/src/cpu/aarch64/vm/aarch64.ad index e2e02cb6b30..5359f30f29c 100644 --- a/hotspot/src/cpu/aarch64/vm/aarch64.ad +++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad @@ -996,6 +996,7 @@ definitions %{ source_hpp %{ #include "gc/shared/cardTableModRefBS.hpp" +#include "opto/addnode.hpp" class CallStubImpl { @@ -1061,6 +1062,9 @@ class HandlerImpl { // predicate controlling translation of StoreCM bool unnecessary_storestore(const Node *storecm); + + // predicate controlling addressing modes + bool size_fits_all_mem_uses(AddPNode* addp, int shift); %} source %{ @@ -3449,11 +3453,6 @@ const int Matcher::float_cmove_cost() { // Does the CPU require late expand (see block.cpp for description of late expand)? const bool Matcher::require_postalloc_expand = false; -// Should the Matcher clone shifts on addressing modes, expecting them -// to be subsumed into complex addressing expressions or compute them -// into registers? True for Intel but false for most RISCs -const bool Matcher::clone_shift_expressions = false; - // Do we need to mask the count passed to shift instructions or does // the cpu only look at the lower 5/6 bits anyway? const bool Matcher::need_masked_shift_count = false; @@ -3572,8 +3571,119 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() { return FP_REG_mask(); } +bool size_fits_all_mem_uses(AddPNode* addp, int shift) { + for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) { + Node* u = addp->fast_out(i); + if (u->is_Mem()) { + int opsize = u->as_Mem()->memory_size(); + assert(opsize > 0, "unexpected memory operand size"); + if (u->as_Mem()->memory_size() != (1<in(AddPNode::Offset); + if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() && + size_fits_all_mem_uses(m, off->in(2)->get_int()) && + // Are there other uses besides address expressions? + !is_visited(off)) { + address_visited.set(off->_idx); // Flag as address_visited + mstack.push(off->in(2), Visit); + Node *conv = off->in(1); + if (conv->Opcode() == Op_ConvI2L && + // Are there other uses besides address expressions? + !is_visited(conv)) { + address_visited.set(conv->_idx); // Flag as address_visited + mstack.push(conv->in(1), Pre_Visit); + } else { + mstack.push(conv, Pre_Visit); + } + address_visited.test_set(m->_idx); // Flag as address_visited + mstack.push(m->in(AddPNode::Address), Pre_Visit); + mstack.push(m->in(AddPNode::Base), Pre_Visit); + return true; + } else if (off->Opcode() == Op_ConvI2L && + // Are there other uses besides address expressions? + !is_visited(off)) { + address_visited.test_set(m->_idx); // Flag as address_visited + address_visited.set(off->_idx); // Flag as address_visited + mstack.push(off->in(1), Pre_Visit); + mstack.push(m->in(AddPNode::Address), Pre_Visit); + mstack.push(m->in(AddPNode::Base), Pre_Visit); + return true; + } + return false; +} + +// Transform: +// (AddP base (AddP base address (LShiftL index con)) offset) +// into: +// (AddP base (AddP base offset) (LShiftL index con)) +// to take full advantage of ARM's addressing modes +void Compile::reshape_address(AddPNode* addp) { + Node *addr = addp->in(AddPNode::Address); + if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) { + const AddPNode *addp2 = addr->as_AddP(); + if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL && + addp2->in(AddPNode::Offset)->in(2)->is_Con() && + size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) || + addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) { + + // Any use that can't embed the address computation? + for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) { + Node* u = addp->fast_out(i); + if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) { + return; + } + } + + Node* off = addp->in(AddPNode::Offset); + Node* addr2 = addp2->in(AddPNode::Address); + Node* base = addp->in(AddPNode::Base); + + Node* new_addr = NULL; + // Check whether the graph already has the new AddP we need + // before we create one (no GVN available here). + for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) { + Node* u = addr2->fast_out(i); + if (u->is_AddP() && + u->in(AddPNode::Base) == base && + u->in(AddPNode::Address) == addr2 && + u->in(AddPNode::Offset) == off) { + new_addr = u; + break; + } + } + + if (new_addr == NULL) { + new_addr = new AddPNode(base, addr2, off); + } + Node* new_off = addp2->in(AddPNode::Offset); + addp->set_req(AddPNode::Address, new_addr); + if (addr->outcnt() == 0) { + addr->disconnect_inputs(NULL, this); + } + addp->set_req(AddPNode::Offset, new_off); + if (off->outcnt() == 0) { + off->disconnect_inputs(NULL, this); + } + } + } +} + // helper for encoding java_to_runtime calls on sim // // this is needed to compute the extra arguments required when @@ -3643,12 +3753,10 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, // encoder that the index needs to be sign extended, so we have to // enumerate all the cases. switch (opcode) { - case INDINDEXSCALEDOFFSETI2L: case INDINDEXSCALEDI2L: - case INDINDEXSCALEDOFFSETI2LN: case INDINDEXSCALEDI2LN: - case INDINDEXOFFSETI2L: - case INDINDEXOFFSETI2LN: + case INDINDEXI2L: + case INDINDEXI2LN: scale = Address::sxtw(size); break; default: @@ -3658,12 +3766,8 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, if (index == -1) { (masm.*insn)(reg, Address(base, disp)); } else { - if (disp == 0) { - (masm.*insn)(reg, Address(base, as_Register(index), scale)); - } else { - masm.lea(rscratch1, Address(base, disp)); - (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale)); - } + assert(disp == 0, "unsupported address mode: disp = %d", disp); + (masm.*insn)(reg, Address(base, as_Register(index), scale)); } } @@ -3674,9 +3778,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, Address::extend scale; switch (opcode) { - case INDINDEXSCALEDOFFSETI2L: case INDINDEXSCALEDI2L: - case INDINDEXSCALEDOFFSETI2LN: case INDINDEXSCALEDI2LN: scale = Address::sxtw(size); break; @@ -3687,12 +3789,8 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, if (index == -1) { (masm.*insn)(reg, Address(base, disp)); } else { - if (disp == 0) { - (masm.*insn)(reg, Address(base, as_Register(index), scale)); - } else { - masm.lea(rscratch1, Address(base, disp)); - (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale)); - } + assert(disp == 0, "unsupported address mode: disp = %d", disp); + (masm.*insn)(reg, Address(base, as_Register(index), scale)); } } @@ -6106,65 +6204,10 @@ operand indirect(iRegP reg) %} %} -operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off) -%{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP reg (LShiftL lreg scale)) off); - op_cost(INSN_COST); - format %{ "$reg, $lreg lsl($scale), $off" %} - interface(MEMORY_INTER) %{ - base($reg); - index($lreg); - scale($scale); - disp($off); - %} -%} - -operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off) -%{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP reg (LShiftL lreg scale)) off); - op_cost(INSN_COST); - format %{ "$reg, $lreg lsl($scale), $off" %} - interface(MEMORY_INTER) %{ - base($reg); - index($lreg); - scale($scale); - disp($off); - %} -%} - -operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off) -%{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP reg (ConvI2L ireg)) off); - op_cost(INSN_COST); - format %{ "$reg, $ireg, $off I2L" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale(0x0); - disp($off); - %} -%} - -operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off) -%{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off); - op_cost(INSN_COST); - format %{ "$reg, $ireg sxtw($scale), $off I2L" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale($scale); - disp($off); - %} -%} - operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale) %{ constraint(ALLOC_IN_RC(ptr_reg)); + predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int())); match(AddP reg (LShiftL (ConvI2L ireg) scale)); op_cost(0); format %{ "$reg, $ireg sxtw($scale), 0, I2L" %} @@ -6179,6 +6222,7 @@ operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale) operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale) %{ constraint(ALLOC_IN_RC(ptr_reg)); + predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int())); match(AddP reg (LShiftL lreg scale)); op_cost(0); format %{ "$reg, $lreg lsl($scale)" %} @@ -6190,6 +6234,20 @@ operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale) %} %} +operand indIndexI2L(iRegP reg, iRegI ireg) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg (ConvI2L ireg)); + op_cost(0); + format %{ "$reg, $ireg, 0, I2L" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale(0x0); + disp(0x0); + %} +%} + operand indIndex(iRegP reg, iRegL lreg) %{ constraint(ALLOC_IN_RC(ptr_reg)); @@ -6331,69 +6389,9 @@ operand indirectN(iRegN reg) %} %} -operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off) -%{ - predicate(Universe::narrow_oop_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off); - op_cost(0); - format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %} - interface(MEMORY_INTER) %{ - base($reg); - index($lreg); - scale($scale); - disp($off); - %} -%} - -operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off) -%{ - predicate(Universe::narrow_oop_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off); - op_cost(INSN_COST); - format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %} - interface(MEMORY_INTER) %{ - base($reg); - index($lreg); - scale($scale); - disp($off); - %} -%} - -operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off) -%{ - predicate(Universe::narrow_oop_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off); - op_cost(INSN_COST); - format %{ "$reg, $ireg, $off I2L\t# narrow" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale(0x0); - disp($off); - %} -%} - -operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off) -%{ - predicate(Universe::narrow_oop_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off); - op_cost(INSN_COST); - format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale($scale); - disp($off); - %} -%} - operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale) %{ - predicate(Universe::narrow_oop_shift() == 0); + predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int())); constraint(ALLOC_IN_RC(ptr_reg)); match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)); op_cost(0); @@ -6408,7 +6406,7 @@ operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale) operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale) %{ - predicate(Universe::narrow_oop_shift() == 0); + predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int())); constraint(ALLOC_IN_RC(ptr_reg)); match(AddP (DecodeN reg) (LShiftL lreg scale)); op_cost(0); @@ -6421,6 +6419,21 @@ operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale) %} %} +operand indIndexI2LN(iRegN reg, iRegI ireg) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) (ConvI2L ireg)); + op_cost(0); + format %{ "$reg, $ireg, 0, I2L\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale(0x0); + disp(0x0); + %} +%} + operand indIndexN(iRegN reg, iRegL lreg) %{ predicate(Universe::narrow_oop_shift() == 0); @@ -6641,9 +6654,8 @@ opclass vmem16(indirect, indIndex, indOffI16, indOffL16); // memory is used to define read/write location for load/store // instruction defs. we can turn a memory op into an Address -opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL, - indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN); - +opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL, + indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN); // iRegIorL2I is used for src inputs in rules for 32 bit int (I) // operations. it allows the src to be either an iRegI or a (ConvL2I diff --git a/hotspot/src/cpu/ppc/vm/ppc.ad b/hotspot/src/cpu/ppc/vm/ppc.ad index b2b6ba500ea..0dc5a4d2b45 100644 --- a/hotspot/src/cpu/ppc/vm/ppc.ad +++ b/hotspot/src/cpu/ppc/vm/ppc.ad @@ -817,6 +817,16 @@ source_hpp %{ source %{ +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); +} + +void Compile::reshape_address(AddPNode* addp) { +} + // Optimize load-acquire. // // Check if acquire is unnecessary due to following operation that does @@ -2157,11 +2167,6 @@ const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } // Power6 requires postalloc expand (see block.cpp for description of postalloc expand). const bool Matcher::require_postalloc_expand = true; -// Should the Matcher clone shifts on addressing modes, expecting them to -// be subsumed into complex addressing expressions or compute them into -// registers? True for Intel but false for most RISCs. -const bool Matcher::clone_shift_expressions = false; - // Do we need to mask the count passed to shift instructions or does // the cpu only look at the lower 5/6 bits anyway? // PowerPC requires masked shift counts. diff --git a/hotspot/src/cpu/sparc/vm/sparc.ad b/hotspot/src/cpu/sparc/vm/sparc.ad index 7e610507004..653da26ba0b 100644 --- a/hotspot/src/cpu/sparc/vm/sparc.ad +++ b/hotspot/src/cpu/sparc/vm/sparc.ad @@ -1995,11 +1995,6 @@ const int Matcher::float_cmove_cost() { // Does the CPU require late expand (see block.cpp for description of late expand)? const bool Matcher::require_postalloc_expand = false; -// Should the Matcher clone shifts on addressing modes, expecting them to -// be subsumed into complex addressing expressions or compute them into -// registers? True for Intel but false for most RISCs -const bool Matcher::clone_shift_expressions = false; - // Do we need to mask the count passed to shift instructions or does // the cpu only look at the lower 5/6 bits anyway? const bool Matcher::need_masked_shift_count = false; @@ -2133,8 +2128,19 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() { return L7_REGP_mask(); } + const bool Matcher::convi2l_type_required = true; +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); +} + +void Compile::reshape_address(AddPNode* addp) { +} + %} diff --git a/hotspot/src/cpu/x86/vm/x86.ad b/hotspot/src/cpu/x86/vm/x86.ad index dc9a734c7f1..ce14f4c742e 100644 --- a/hotspot/src/cpu/x86/vm/x86.ad +++ b/hotspot/src/cpu/x86/vm/x86.ad @@ -1586,6 +1586,8 @@ class HandlerImpl { source %{ +#include "opto/addnode.hpp" + // Emit exception handler code. // Stuff framesize into a register and call a VM stub routine. int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { @@ -1861,8 +1863,79 @@ const bool Matcher::pass_original_key_for_aes() { return false; } + const bool Matcher::convi2l_type_required = true; +// Check for shift by small constant as well +static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { + if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && + shift->in(2)->get_int() <= 3 && + // Are there other uses besides address expressions? + !matcher->is_visited(shift)) { + address_visited.set(shift->_idx); // Flag as address_visited + mstack.push(shift->in(2), Matcher::Visit); + Node *conv = shift->in(1); +#ifdef _LP64 + // Allow Matcher to match the rule which bypass + // ConvI2L operation for an array index on LP64 + // if the index value is positive. + if (conv->Opcode() == Op_ConvI2L && + conv->as_Type()->type()->is_long()->_lo >= 0 && + // Are there other uses besides address expressions? + !matcher->is_visited(conv)) { + address_visited.set(conv->_idx); // Flag as address_visited + mstack.push(conv->in(1), Matcher::Pre_Visit); + } else +#endif + mstack.push(conv, Matcher::Pre_Visit); + return true; + } + return false; +} + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + Node *off = m->in(AddPNode::Offset); + if (off->is_Con()) { + address_visited.test_set(m->_idx); // Flag as address_visited + Node *adr = m->in(AddPNode::Address); + + // Intel can handle 2 adds in addressing mode + // AtomicAdd is not an addressing expression. + // Cheap to find it by looking for screwy base. + if (adr->is_AddP() && + !adr->in(AddPNode::Base)->is_top() && + // Are there other uses besides address expressions? + !is_visited(adr)) { + address_visited.set(adr->_idx); // Flag as address_visited + Node *shift = adr->in(AddPNode::Offset); + if (!clone_shift(shift, this, mstack, address_visited)) { + mstack.push(shift, Pre_Visit); + } + mstack.push(adr->in(AddPNode::Address), Pre_Visit); + mstack.push(adr->in(AddPNode::Base), Pre_Visit); + } else { + mstack.push(adr, Pre_Visit); + } + + // Clone X+offset as it also folds into most addressing expressions + mstack.push(off, Visit); + mstack.push(m->in(AddPNode::Base), Pre_Visit); + return true; + } else if (clone_shift(off, this, mstack, address_visited)) { + address_visited.test_set(m->_idx); // Flag as address_visited + mstack.push(m->in(AddPNode::Address), Pre_Visit); + mstack.push(m->in(AddPNode::Base), Pre_Visit); + return true; + } + return false; +} + +void Compile::reshape_address(AddPNode* addp) { +} + // Helper methods for MachSpillCopyNode::implementation(). static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, uint ireg, outputStream* st) { diff --git a/hotspot/src/cpu/x86/vm/x86_32.ad b/hotspot/src/cpu/x86/vm/x86_32.ad index 883d2bf6823..a45cb554e3c 100644 --- a/hotspot/src/cpu/x86/vm/x86_32.ad +++ b/hotspot/src/cpu/x86/vm/x86_32.ad @@ -1438,11 +1438,6 @@ const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimi // Does the CPU require late expand (see block.cpp for description of late expand)? const bool Matcher::require_postalloc_expand = false; -// Should the Matcher clone shifts on addressing modes, expecting them to -// be subsumed into complex addressing expressions or compute them into -// registers? True for Intel but false for most RISCs -const bool Matcher::clone_shift_expressions = true; - // Do we need to mask the count passed to shift instructions or does // the cpu only look at the lower 5/6 bits anyway? const bool Matcher::need_masked_shift_count = false; diff --git a/hotspot/src/cpu/x86/vm/x86_64.ad b/hotspot/src/cpu/x86/vm/x86_64.ad index d8409a2ddd2..a680d60e994 100644 --- a/hotspot/src/cpu/x86/vm/x86_64.ad +++ b/hotspot/src/cpu/x86/vm/x86_64.ad @@ -1646,11 +1646,6 @@ const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } // Does the CPU require late expand (see block.cpp for description of late expand)? const bool Matcher::require_postalloc_expand = false; -// Should the Matcher clone shifts on addressing modes, expecting them -// to be subsumed into complex addressing expressions or compute them -// into registers? True for Intel but false for most RISCs -const bool Matcher::clone_shift_expressions = true; - // Do we need to mask the count passed to shift instructions or does // the cpu only look at the lower 5/6 bits anyway? const bool Matcher::need_masked_shift_count = false; diff --git a/hotspot/src/share/vm/opto/compile.cpp b/hotspot/src/share/vm/opto/compile.cpp index cecd3098214..5e1e50ef6af 100644 --- a/hotspot/src/share/vm/opto/compile.cpp +++ b/hotspot/src/share/vm/opto/compile.cpp @@ -2905,6 +2905,8 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) { } } #endif + // platform dependent reshaping of the address expression + reshape_address(n->as_AddP()); break; } diff --git a/hotspot/src/share/vm/opto/compile.hpp b/hotspot/src/share/vm/opto/compile.hpp index 3cbbf51cd1d..f8743d86ba9 100644 --- a/hotspot/src/share/vm/opto/compile.hpp +++ b/hotspot/src/share/vm/opto/compile.hpp @@ -44,6 +44,7 @@ #include "trace/tracing.hpp" #include "utilities/ticks.hpp" +class AddPNode; class Block; class Bundle; class C2Compiler; @@ -579,6 +580,8 @@ class Compile : public Phase { int _scratch_const_size; // For temporary code buffers. bool _in_scratch_emit_size; // true when in scratch_emit_size. + void reshape_address(AddPNode* n); + public: // Accessors diff --git a/hotspot/src/share/vm/opto/matcher.cpp b/hotspot/src/share/vm/opto/matcher.cpp index fbf861a3c2e..44127f3bc23 100644 --- a/hotspot/src/share/vm/opto/matcher.cpp +++ b/hotspot/src/share/vm/opto/matcher.cpp @@ -963,44 +963,6 @@ static void match_alias_type(Compile* C, Node* n, Node* m) { } #endif - -//------------------------------MStack----------------------------------------- -// State and MStack class used in xform() and find_shared() iterative methods. -enum Node_State { Pre_Visit, // node has to be pre-visited - Visit, // visit node - Post_Visit, // post-visit node - Alt_Post_Visit // alternative post-visit path - }; - -class MStack: public Node_Stack { - public: - MStack(int size) : Node_Stack(size) { } - - void push(Node *n, Node_State ns) { - Node_Stack::push(n, (uint)ns); - } - void push(Node *n, Node_State ns, Node *parent, int indx) { - ++_inode_top; - if ((_inode_top + 1) >= _inode_max) grow(); - _inode_top->node = parent; - _inode_top->indx = (uint)indx; - ++_inode_top; - _inode_top->node = n; - _inode_top->indx = (uint)ns; - } - Node *parent() { - pop(); - return node(); - } - Node_State state() const { - return (Node_State)index(); - } - void set_state(Node_State ns) { - set_index((uint)ns); - } -}; - - //------------------------------xform------------------------------------------ // Given a Node in old-space, Match him (Label/Reduce) to produce a machine // Node in new-space. Given a new-space Node, recursively walk his children. @@ -2046,37 +2008,22 @@ bool Matcher::is_bmi_pattern(Node *n, Node *m) { } #endif // X86 -// A method-klass-holder may be passed in the inline_cache_reg -// and then expanded into the inline_cache_reg and a method_oop register -// defined in ad_.cpp - -// Check for shift by small constant as well -static bool clone_shift(Node* shift, Matcher* matcher, MStack& mstack, VectorSet& address_visited) { - if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && - shift->in(2)->get_int() <= 3 && - // Are there other uses besides address expressions? - !matcher->is_visited(shift)) { - address_visited.set(shift->_idx); // Flag as address_visited - mstack.push(shift->in(2), Visit); - Node *conv = shift->in(1); -#ifdef _LP64 - // Allow Matcher to match the rule which bypass - // ConvI2L operation for an array index on LP64 - // if the index value is positive. - if (conv->Opcode() == Op_ConvI2L && - conv->as_Type()->type()->is_long()->_lo >= 0 && - // Are there other uses besides address expressions? - !matcher->is_visited(conv)) { - address_visited.set(conv->_idx); // Flag as address_visited - mstack.push(conv->in(1), Pre_Visit); - } else -#endif - mstack.push(conv, Pre_Visit); +bool Matcher::clone_base_plus_offset_address(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + Node *off = m->in(AddPNode::Offset); + if (off->is_Con()) { + address_visited.test_set(m->_idx); // Flag as address_visited + mstack.push(m->in(AddPNode::Address), Pre_Visit); + // Clone X+offset as it also folds into most addressing expressions + mstack.push(off, Visit); + mstack.push(m->in(AddPNode::Base), Pre_Visit); return true; } return false; } +// A method-klass-holder may be passed in the inline_cache_reg +// and then expanded into the inline_cache_reg and a method_oop register +// defined in ad_.cpp //------------------------------find_shared------------------------------------ // Set bits if Node is shared or otherwise a root @@ -2251,40 +2198,9 @@ void Matcher::find_shared( Node *n ) { // But they should be marked as shared if there are other uses // besides address expressions. - Node *off = m->in(AddPNode::Offset); - if (off->is_Con()) { - address_visited.test_set(m->_idx); // Flag as address_visited - Node *adr = m->in(AddPNode::Address); - - // Intel, ARM and friends can handle 2 adds in addressing mode - if( clone_shift_expressions && adr->is_AddP() && - // AtomicAdd is not an addressing expression. - // Cheap to find it by looking for screwy base. - !adr->in(AddPNode::Base)->is_top() && - // Are there other uses besides address expressions? - !is_visited(adr) ) { - address_visited.set(adr->_idx); // Flag as address_visited - Node *shift = adr->in(AddPNode::Offset); - if (!clone_shift(shift, this, mstack, address_visited)) { - mstack.push(shift, Pre_Visit); - } - mstack.push(adr->in(AddPNode::Address), Pre_Visit); - mstack.push(adr->in(AddPNode::Base), Pre_Visit); - } else { // Sparc, Alpha, PPC and friends - mstack.push(adr, Pre_Visit); - } - - // Clone X+offset as it also folds into most addressing expressions - mstack.push(off, Visit); - mstack.push(m->in(AddPNode::Base), Pre_Visit); - continue; // for(int i = ...) - } else if (clone_shift_expressions && - clone_shift(off, this, mstack, address_visited)) { - address_visited.test_set(m->_idx); // Flag as address_visited - mstack.push(m->in(AddPNode::Address), Pre_Visit); - mstack.push(m->in(AddPNode::Base), Pre_Visit); - continue; - } // if( off->is_Con() ) + if (clone_address_expressions(m->as_AddP(), mstack, address_visited)) { + continue; + } } // if( mem_op && mstack.push(m, Pre_Visit); } // for(int i = ...) diff --git a/hotspot/src/share/vm/opto/matcher.hpp b/hotspot/src/share/vm/opto/matcher.hpp index aa9311d7788..74d7361d08c 100644 --- a/hotspot/src/share/vm/opto/matcher.hpp +++ b/hotspot/src/share/vm/opto/matcher.hpp @@ -40,6 +40,45 @@ class MachOper; //---------------------------Matcher------------------------------------------- class Matcher : public PhaseTransform { friend class VMStructs; + +public: + + // State and MStack class used in xform() and find_shared() iterative methods. + enum Node_State { Pre_Visit, // node has to be pre-visited + Visit, // visit node + Post_Visit, // post-visit node + Alt_Post_Visit // alternative post-visit path + }; + + class MStack: public Node_Stack { + public: + MStack(int size) : Node_Stack(size) { } + + void push(Node *n, Node_State ns) { + Node_Stack::push(n, (uint)ns); + } + void push(Node *n, Node_State ns, Node *parent, int indx) { + ++_inode_top; + if ((_inode_top + 1) >= _inode_max) grow(); + _inode_top->node = parent; + _inode_top->indx = (uint)indx; + ++_inode_top; + _inode_top->node = n; + _inode_top->indx = (uint)ns; + } + Node *parent() { + pop(); + return node(); + } + Node_State state() const { + return (Node_State)index(); + } + void set_state(Node_State ns) { + set_index((uint)ns); + } + }; + +private: // Private arena of State objects ResourceArea _states_arena; @@ -411,7 +450,9 @@ public: // Should the Matcher clone shifts on addressing modes, expecting them to // be subsumed into complex addressing expressions or compute them into // registers? True for Intel but false for most RISCs - static const bool clone_shift_expressions; + bool clone_address_expressions(AddPNode* m, MStack& mstack, VectorSet& address_visited); + // Clone base + offset address expression + bool clone_base_plus_offset_address(AddPNode* m, MStack& mstack, VectorSet& address_visited); static bool narrow_oop_use_complex_address(); static bool narrow_klass_use_complex_address();