8230565: ZGC: Redesign C2 load barrier to expand on the MachNode level

Co-authored-by: Per Liden <per.liden@oracle.com>
Co-authored-by: Stefan Karlsson <stefan.karlsson@oracle.com>
Co-authored-by: Nils Eliasson <nils.eliasson@oracle.com>
Reviewed-by: pliden, stefank, neliasso
This commit is contained in:
Erik Österlund 2019-10-09 12:30:06 +00:00
parent dcc9cc3fdd
commit 42885307f6
35 changed files with 1534 additions and 2861 deletions

View File

@ -2513,17 +2513,8 @@ void Compile::reshape_address(AddPNode* addp) {
__ INSN(REG, as_Register(BASE)); \
}
typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
MacroAssembler::SIMD_RegVariant T, const Address &adr);
// Used for all non-volatile memory accesses. The use of
// $mem->opcode() to discover whether this pattern uses sign-extended
// offsets is something of a kludge.
static void loadStore(MacroAssembler masm, mem_insn insn,
Register reg, int opcode,
Register base, int index, int size, int disp)
static Address mem2address(int opcode, Register base, int index, int size, int disp)
{
Address::extend scale;
@ -2542,16 +2533,34 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
}
if (index == -1) {
(masm.*insn)(reg, Address(base, disp));
return Address(base, disp);
} else {
assert(disp == 0, "unsupported address mode: disp = %d", disp);
(masm.*insn)(reg, Address(base, as_Register(index), scale));
return Address(base, as_Register(index), scale);
}
}
typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
typedef void (MacroAssembler::* mem_insn2)(Register Rt, Register adr);
typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
MacroAssembler::SIMD_RegVariant T, const Address &adr);
// Used for all non-volatile memory accesses. The use of
// $mem->opcode() to discover whether this pattern uses sign-extended
// offsets is something of a kludge.
static void loadStore(MacroAssembler masm, mem_insn insn,
Register reg, int opcode,
Register base, int index, int size, int disp)
{
Address addr = mem2address(opcode, base, index, size, disp);
(masm.*insn)(reg, addr);
}
static void loadStore(MacroAssembler masm, mem_float_insn insn,
FloatRegister reg, int opcode,
Register base, int index, int size, int disp)
FloatRegister reg, int opcode,
Register base, int index, int size, int disp)
{
Address::extend scale;
@ -2573,8 +2582,8 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
}
static void loadStore(MacroAssembler masm, mem_vector_insn insn,
FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
int opcode, Register base, int index, int size, int disp)
FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
int opcode, Register base, int index, int size, int disp)
{
if (index == -1) {
(masm.*insn)(reg, T, Address(base, disp));
@ -3791,7 +3800,7 @@ frame %{
static const int hi[Op_RegL + 1] = { // enum name
0, // Op_Node
0, // Op_Set
OptoReg::Bad, // Op_RegN
OptoReg::Bad, // Op_RegN
OptoReg::Bad, // Op_RegI
R0_H_num, // Op_RegP
OptoReg::Bad, // Op_RegF
@ -6923,7 +6932,7 @@ instruct loadRange(iRegINoSp dst, memory mem)
instruct loadP(iRegPNoSp dst, memory mem)
%{
match(Set dst (LoadP mem));
predicate(!needs_acquiring_load(n));
predicate(!needs_acquiring_load(n) && (n->as_Load()->barrier_data() == 0));
ins_cost(4 * INSN_COST);
format %{ "ldr $dst, $mem\t# ptr" %}
@ -7616,6 +7625,7 @@ instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
%{
match(Set dst (LoadP mem));
predicate(n->as_Load()->barrier_data() == 0);
ins_cost(VOLATILE_REF_COST);
format %{ "ldar $dst, $mem\t# ptr" %}
@ -8552,6 +8562,7 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS
instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
predicate(n->as_LoadStore()->barrier_data() == 0);
ins_cost(2 * VOLATILE_REF_COST);
effect(KILL cr);
@ -8665,7 +8676,7 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL
instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
@ -8796,6 +8807,7 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne
%}
instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
ins_cost(2 * VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
@ -8895,7 +8907,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN
%}
instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
@ -8996,6 +9008,7 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne
%}
instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
ins_cost(2 * VOLATILE_REF_COST);
effect(KILL cr);
@ -9103,8 +9116,8 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN
%}
instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
@ -9154,6 +9167,7 @@ instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
%}
instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set prev (GetAndSetP mem newv));
ins_cost(2 * VOLATILE_REF_COST);
format %{ "atomic_xchg $prev, $newv, [$mem]" %}
@ -9197,7 +9211,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
%}
instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
predicate(needs_acquiring_load_exclusive(n));
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
match(Set prev (GetAndSetP mem newv));
ins_cost(VOLATILE_REF_COST);
format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %}

View File

@ -24,22 +24,23 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "code/codeBlob.hpp"
#include "code/vmreg.inline.hpp"
#include "gc/z/zBarrier.inline.hpp"
#include "gc/z/zBarrierSet.hpp"
#include "gc/z/zBarrierSetAssembler.hpp"
#include "gc/z/zBarrierSetRuntime.hpp"
#include "gc/z/zThreadLocalData.hpp"
#include "memory/resourceArea.hpp"
#include "runtime/sharedRuntime.hpp"
#include "utilities/macros.hpp"
#ifdef COMPILER1
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/z/c1/zBarrierSetC1.hpp"
#endif // COMPILER1
#include "gc/z/zThreadLocalData.hpp"
ZBarrierSetAssembler::ZBarrierSetAssembler() :
_load_barrier_slow_stub(),
_load_barrier_weak_slow_stub() {}
#ifdef COMPILER2
#include "gc/z/c2/zBarrierSetC2.hpp"
#endif // COMPILER2
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
@ -66,7 +67,7 @@ void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
assert_different_registers(rscratch1, rscratch2, src.base());
assert_different_registers(rscratch1, rscratch2, dst);
RegSet savedRegs = RegSet::range(r0,r28) - RegSet::of(dst, rscratch1, rscratch2);
RegSet savedRegs = RegSet::range(r0, r28) - RegSet::of(dst, rscratch1, rscratch2);
Label done;
@ -206,7 +207,8 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
// The Address offset is too large to direct load - -784. Our range is +127, -128.
__ mov(tmp, (long int)(in_bytes(ZThreadLocalData::address_bad_mask_offset()) -
in_bytes(JavaThread::jni_environment_offset())));
in_bytes(JavaThread::jni_environment_offset())));
// Load address bad mask
__ add(tmp, jni_env, tmp);
__ ldr(tmp, Address(tmp));
@ -294,12 +296,12 @@ void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler*
__ prologue("zgc_load_barrier stub", false);
// We don't use push/pop_clobbered_registers() - we need to pull out the result from r0.
for (int i = 0; i < 32; i +=2) {
__ stpd(as_FloatRegister(i), as_FloatRegister(i+1), Address(__ pre(sp,-16)));
for (int i = 0; i < 32; i += 2) {
__ stpd(as_FloatRegister(i), as_FloatRegister(i + 1), Address(__ pre(sp,-16)));
}
RegSet saveRegs = RegSet::range(r0,r28) - RegSet::of(r0);
__ push(saveRegs, sp);
const RegSet save_regs = RegSet::range(r1, r28);
__ push(save_regs, sp);
// Setup arguments
__ load_parameter(0, c_rarg0);
@ -307,98 +309,161 @@ void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler*
__ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
__ pop(saveRegs, sp);
__ pop(save_regs, sp);
for (int i = 30; i >0; i -=2) {
__ ldpd(as_FloatRegister(i), as_FloatRegister(i+1), Address(__ post(sp, 16)));
}
for (int i = 30; i >= 0; i -= 2) {
__ ldpd(as_FloatRegister(i), as_FloatRegister(i + 1), Address(__ post(sp, 16)));
}
__ epilogue();
}
#endif // COMPILER1
#ifdef COMPILER2
OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
if (!OptoReg::is_reg(opto_reg)) {
return OptoReg::Bad;
}
const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
if (vm_reg->is_FloatRegister()) {
return opto_reg & ~1;
}
return opto_reg;
}
#undef __
#define __ cgen->assembler()->
#define __ _masm->
// Generates a register specific stub for calling
// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or
// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded().
//
// The raddr register serves as both input and output for this stub. When the stub is
// called the raddr register contains the object field address (oop*) where the bad oop
// was loaded from, which caused the slow path to be taken. On return from the stub the
// raddr register contains the good/healed oop returned from
// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or
// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded().
static address generate_load_barrier_stub(StubCodeGenerator* cgen, Register raddr, DecoratorSet decorators) {
// Don't generate stub for invalid registers
if (raddr == zr || raddr == r29 || raddr == r30) {
return NULL;
class ZSaveLiveRegisters {
private:
MacroAssembler* const _masm;
RegSet _gp_regs;
RegSet _fp_regs;
public:
void initialize(ZLoadBarrierStubC2* stub) {
// Create mask of live registers
RegMask live = stub->live();
// Record registers that needs to be saved/restored
while (live.is_NotEmpty()) {
const OptoReg::Name opto_reg = live.find_first_elem();
live.Remove(opto_reg);
if (OptoReg::is_reg(opto_reg)) {
const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
if (vm_reg->is_Register()) {
_gp_regs += RegSet::of(vm_reg->as_Register());
} else if (vm_reg->is_FloatRegister()) {
_fp_regs += RegSet::of((Register)vm_reg->as_FloatRegister());
} else {
fatal("Unknown register type");
}
}
}
// Remove C-ABI SOE registers, scratch regs and _ref register that will be updated
_gp_regs -= RegSet::range(r19, r30) + RegSet::of(r8, r9, stub->ref());
}
// Create stub name
char name[64];
const bool weak = (decorators & ON_WEAK_OOP_REF) != 0;
os::snprintf(name, sizeof(name), "zgc_load_barrier%s_stub_%s", weak ? "_weak" : "", raddr->name());
ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
_masm(masm),
_gp_regs(),
_fp_regs() {
__ align(CodeEntryAlignment);
StubCodeMark mark(cgen, "StubRoutines", os::strdup(name, mtCode));
address start = __ pc();
// Figure out what registers to save/restore
initialize(stub);
// Save live registers
RegSet savedRegs = RegSet::range(r0,r18) - RegSet::of(raddr);
__ enter();
__ push(savedRegs, sp);
// Setup arguments
if (raddr != c_rarg1) {
__ mov(c_rarg1, raddr);
// Save registers
__ push(_gp_regs, sp);
__ push_fp(_fp_regs, sp);
}
__ ldr(c_rarg0, Address(raddr));
~ZSaveLiveRegisters() {
// Restore registers
__ pop_fp(_fp_regs, sp);
__ pop(_gp_regs, sp);
}
};
// Call barrier function
__ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), c_rarg0, c_rarg1);
#undef __
#define __ _masm->
// Move result returned in r0 to raddr, if needed
if (raddr != r0) {
__ mov(raddr, r0);
class ZSetupArguments {
private:
MacroAssembler* const _masm;
const Register _ref;
const Address _ref_addr;
public:
ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
_masm(masm),
_ref(stub->ref()),
_ref_addr(stub->ref_addr()) {
// Setup arguments
if (_ref_addr.base() == noreg) {
// No self healing
if (_ref != c_rarg0) {
__ mov(c_rarg0, _ref);
}
__ mov(c_rarg1, 0);
} else {
// Self healing
if (_ref == c_rarg0) {
// _ref is already at correct place
__ lea(c_rarg1, _ref_addr);
} else if (_ref != c_rarg1) {
// _ref is in wrong place, but not in c_rarg1, so fix it first
__ lea(c_rarg1, _ref_addr);
__ mov(c_rarg0, _ref);
} else if (_ref_addr.base() != c_rarg0 && _ref_addr.index() != c_rarg0) {
assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0");
__ mov(c_rarg0, _ref);
__ lea(c_rarg1, _ref_addr);
} else {
assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0");
if (_ref_addr.base() == c_rarg0 || _ref_addr.index() == c_rarg0) {
__ mov(rscratch2, c_rarg1);
__ lea(c_rarg1, _ref_addr);
__ mov(c_rarg0, rscratch2);
} else {
ShouldNotReachHere();
}
}
}
}
__ pop(savedRegs, sp);
__ leave();
__ ret(lr);
~ZSetupArguments() {
// Transfer result
if (_ref != r0) {
__ mov(_ref, r0);
}
}
};
return start;
#undef __
#define __ masm->
void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
BLOCK_COMMENT("ZLoadBarrierStubC2");
// Stub entry
__ bind(*stub->entry());
{
ZSaveLiveRegisters save_live_registers(masm, stub);
ZSetupArguments setup_arguments(masm, stub);
__ mov(rscratch1, stub->slow_path());
__ blr(rscratch1);
}
// Stub exit
__ b(*stub->continuation());
}
#undef __
static void barrier_stubs_init_inner(const char* label, const DecoratorSet decorators, address* stub) {
const int nregs = 28; // Exclude FP, XZR, SP from calculation.
const int code_size = nregs * 254; // Rough estimate of code size
ResourceMark rm;
CodeBuffer buf(BufferBlob::create(label, code_size));
StubCodeGenerator cgen(&buf);
for (int i = 0; i < nregs; i++) {
const Register reg = as_Register(i);
stub[i] = generate_load_barrier_stub(&cgen, reg, decorators);
}
}
void ZBarrierSetAssembler::barrier_stubs_init() {
barrier_stubs_init_inner("zgc_load_barrier_stubs", ON_STRONG_OOP_REF, _load_barrier_slow_stub);
barrier_stubs_init_inner("zgc_load_barrier_weak_stubs", ON_WEAK_OOP_REF, _load_barrier_weak_slow_stub);
}
address ZBarrierSetAssembler::load_barrier_slow_stub(Register reg) {
return _load_barrier_slow_stub[reg->encoding()];
}
address ZBarrierSetAssembler::load_barrier_weak_slow_stub(Register reg) {
return _load_barrier_weak_slow_stub[reg->encoding()];
}
#endif // COMPILER2

View File

@ -24,6 +24,12 @@
#ifndef CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP
#define CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP
#include "code/vmreg.hpp"
#include "oops/accessDecorators.hpp"
#ifdef COMPILER2
#include "opto/optoreg.hpp"
#endif // COMPILER2
#ifdef COMPILER1
class LIR_Assembler;
class LIR_OprDesc;
@ -32,14 +38,13 @@ class StubAssembler;
class ZLoadBarrierStubC1;
#endif // COMPILER1
#ifdef COMPILER2
class Node;
class ZLoadBarrierStubC2;
#endif // COMPILER2
class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
private:
address _load_barrier_slow_stub[RegisterImpl::number_of_registers];
address _load_barrier_weak_slow_stub[RegisterImpl::number_of_registers];
public:
ZBarrierSetAssembler();
virtual void load_at(MacroAssembler* masm,
DecoratorSet decorators,
BasicType type,
@ -83,10 +88,13 @@ public:
DecoratorSet decorators) const;
#endif // COMPILER1
virtual void barrier_stubs_init();
#ifdef COMPILER2
OptoReg::Name refine_register(const Node* node,
OptoReg::Name opto_reg);
address load_barrier_slow_stub(Register reg);
address load_barrier_weak_slow_stub(Register reg);
void generate_c2_load_barrier_stub(MacroAssembler* masm,
ZLoadBarrierStubC2* stub) const;
#endif // COMPILER2
};
#endif // CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP

View File

@ -24,155 +24,244 @@
source_hpp %{
#include "gc/z/c2/zBarrierSetC2.hpp"
#include "gc/z/zThreadLocalData.hpp"
%}
source %{
#include "gc/z/zBarrierSetAssembler.hpp"
static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
__ ldr(tmp, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
__ andr(tmp, tmp, ref);
__ cbnz(tmp, *stub->entry());
__ bind(*stub->continuation());
}
static void z_load_barrier_slow_reg(MacroAssembler& _masm, Register dst,
Register base, int index, int scale,
int disp, bool weak) {
const address stub = weak ? ZBarrierSet::assembler()->load_barrier_weak_slow_stub(dst)
: ZBarrierSet::assembler()->load_barrier_slow_stub(dst);
if (index == -1) {
if (disp != 0) {
__ lea(dst, Address(base, disp));
} else {
__ mov(dst, base);
}
} else {
Register index_reg = as_Register(index);
if (disp == 0) {
__ lea(dst, Address(base, index_reg, Address::lsl(scale)));
} else {
__ lea(dst, Address(base, disp));
__ lea(dst, Address(dst, index_reg, Address::lsl(scale)));
}
}
__ far_call(RuntimeAddress(stub));
static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
__ b(*stub->entry());
__ bind(*stub->continuation());
}
%}
//
// Execute ZGC load barrier (strong) slow path
//
instruct loadBarrierSlowReg(iRegP dst, memory src, rFlagsReg cr,
vRegD_V0 v0, vRegD_V1 v1, vRegD_V2 v2, vRegD_V3 v3, vRegD_V4 v4,
vRegD_V5 v5, vRegD_V6 v6, vRegD_V7 v7, vRegD_V8 v8, vRegD_V9 v9,
vRegD_V10 v10, vRegD_V11 v11, vRegD_V12 v12, vRegD_V13 v13, vRegD_V14 v14,
vRegD_V15 v15, vRegD_V16 v16, vRegD_V17 v17, vRegD_V18 v18, vRegD_V19 v19,
vRegD_V20 v20, vRegD_V21 v21, vRegD_V22 v22, vRegD_V23 v23, vRegD_V24 v24,
vRegD_V25 v25, vRegD_V26 v26, vRegD_V27 v27, vRegD_V28 v28, vRegD_V29 v29,
vRegD_V30 v30, vRegD_V31 v31) %{
match(Set dst (LoadBarrierSlowReg src dst));
predicate(!n->as_LoadBarrierSlowReg()->is_weak());
// Load Pointer
instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr)
%{
match(Set dst (LoadP mem));
predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierStrong));
effect(TEMP dst, KILL cr);
effect(KILL cr,
KILL v0, KILL v1, KILL v2, KILL v3, KILL v4, KILL v5, KILL v6, KILL v7,
KILL v8, KILL v9, KILL v10, KILL v11, KILL v12, KILL v13, KILL v14,
KILL v15, KILL v16, KILL v17, KILL v18, KILL v19, KILL v20, KILL v21,
KILL v22, KILL v23, KILL v24, KILL v25, KILL v26, KILL v27, KILL v28,
KILL v29, KILL v30, KILL v31);
ins_cost(4 * INSN_COST);
format %{ "lea $dst, $src\n\t"
"call #ZLoadBarrierSlowPath" %}
format %{ "ldr $dst, $mem" %}
ins_encode %{
z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$base$$Register,
$src$$index, $src$$scale, $src$$disp, false);
const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
__ ldr($dst$$Register, ref_addr);
if (barrier_data() != ZLoadBarrierElided) {
z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, false /* weak */);
}
%}
ins_pipe(pipe_slow);
ins_pipe(iload_reg_mem);
%}
//
// Execute ZGC load barrier (weak) slow path
//
instruct loadBarrierWeakSlowReg(iRegP dst, memory src, rFlagsReg cr,
vRegD_V0 v0, vRegD_V1 v1, vRegD_V2 v2, vRegD_V3 v3, vRegD_V4 v4,
vRegD_V5 v5, vRegD_V6 v6, vRegD_V7 v7, vRegD_V8 v8, vRegD_V9 v9,
vRegD_V10 v10, vRegD_V11 v11, vRegD_V12 v12, vRegD_V13 v13, vRegD_V14 v14,
vRegD_V15 v15, vRegD_V16 v16, vRegD_V17 v17, vRegD_V18 v18, vRegD_V19 v19,
vRegD_V20 v20, vRegD_V21 v21, vRegD_V22 v22, vRegD_V23 v23, vRegD_V24 v24,
vRegD_V25 v25, vRegD_V26 v26, vRegD_V27 v27, vRegD_V28 v28, vRegD_V29 v29,
vRegD_V30 v30, vRegD_V31 v31) %{
match(Set dst (LoadBarrierSlowReg src dst));
predicate(n->as_LoadBarrierSlowReg()->is_weak());
// Load Weak Pointer
instruct zLoadWeakP(iRegPNoSp dst, memory mem, rFlagsReg cr)
%{
match(Set dst (LoadP mem));
predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierWeak));
effect(TEMP dst, KILL cr);
effect(KILL cr,
KILL v0, KILL v1, KILL v2, KILL v3, KILL v4, KILL v5, KILL v6, KILL v7,
KILL v8, KILL v9, KILL v10, KILL v11, KILL v12, KILL v13, KILL v14,
KILL v15, KILL v16, KILL v17, KILL v18, KILL v19, KILL v20, KILL v21,
KILL v22, KILL v23, KILL v24, KILL v25, KILL v26, KILL v27, KILL v28,
KILL v29, KILL v30, KILL v31);
ins_cost(4 * INSN_COST);
format %{ "lea $dst, $src\n\t"
"call #ZLoadBarrierSlowPath" %}
format %{ "ldr $dst, $mem" %}
ins_encode %{
z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$base$$Register,
$src$$index, $src$$scale, $src$$disp, true);
const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
__ ldr($dst$$Register, ref_addr);
z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, true /* weak */);
%}
ins_pipe(pipe_slow);
ins_pipe(iload_reg_mem);
%}
// Load Pointer Volatile
instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg cr)
%{
match(Set dst (LoadP mem));
predicate(UseZGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
effect(TEMP dst, KILL cr);
// Specialized versions of compareAndExchangeP that adds a keepalive that is consumed
// but doesn't affect output.
ins_cost(VOLATILE_REF_COST);
format %{ "ldar $dst, $mem\t" %}
ins_encode %{
__ ldar($dst$$Register, $mem$$Register);
if (barrier_data() != ZLoadBarrierElided) {
z_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, false /* weak */);
}
%}
ins_pipe(pipe_serial);
%}
instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
effect(KILL cr, TEMP_DEF res);
instruct z_compareAndExchangeP(iRegPNoSp res, indirect mem,
iRegP oldval, iRegP newval, iRegP keepalive,
rFlagsReg cr) %{
match(Set res (ZCompareAndExchangeP (Binary mem keepalive) (Binary oldval newval)));
ins_cost(2 * VOLATILE_REF_COST);
format %{ "cmpxchg $mem, $oldval, $newval\n\t"
"cset $res, EQ" %}
ins_encode %{
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
false /* acquire */, true /* release */, false /* weak */, rscratch2);
__ cset($res$$Register, Assembler::EQ);
if (barrier_data() != ZLoadBarrierElided) {
Label good;
__ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
__ andr(rscratch1, rscratch1, rscratch2);
__ cbz(rscratch1, good);
z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */);
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
false /* acquire */, true /* release */, false /* weak */, rscratch2);
__ cset($res$$Register, Assembler::EQ);
__ bind(good);
}
%}
ins_pipe(pipe_slow);
%}
instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
effect(KILL cr, TEMP_DEF res);
ins_cost(2 * VOLATILE_REF_COST);
format %{ "cmpxchg $mem, $oldval, $newval\n\t"
"cset $res, EQ" %}
ins_encode %{
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
true /* acquire */, true /* release */, false /* weak */, rscratch2);
__ cset($res$$Register, Assembler::EQ);
if (barrier_data() != ZLoadBarrierElided) {
Label good;
__ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
__ andr(rscratch1, rscratch1, rscratch2);
__ cbz(rscratch1, good);
z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ );
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
true /* acquire */, true /* release */, false /* weak */, rscratch2);
__ cset($res$$Register, Assembler::EQ);
__ bind(good);
}
%}
ins_pipe(pipe_slow);
%}
instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
%}
ins_cost(2 * VOLATILE_REF_COST);
format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ false, /*release*/ true,
/*weak*/ false, $res$$Register);
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
false /* acquire */, true /* release */, false /* weak */, $res$$Register);
if (barrier_data() != ZLoadBarrierElided) {
Label good;
__ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
__ andr(rscratch1, rscratch1, $res$$Register);
__ cbz(rscratch1, good);
z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
false /* acquire */, true /* release */, false /* weak */, $res$$Register);
__ bind(good);
}
%}
ins_pipe(pipe_slow);
%}
instruct z_compareAndSwapP(iRegINoSp res,
indirect mem,
iRegP oldval, iRegP newval, iRegP keepalive,
rFlagsReg cr) %{
match(Set res (ZCompareAndSwapP (Binary mem keepalive) (Binary oldval newval)));
match(Set res (ZWeakCompareAndSwapP (Binary mem keepalive) (Binary oldval newval)));
ins_cost(2 * VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
"cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
aarch64_enc_cset_eq(res));
ins_pipe(pipe_slow);
%}
instruct z_get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev,
iRegP keepalive) %{
match(Set prev (ZGetAndSetP mem (Binary newv keepalive)));
instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
predicate(UseZGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
effect(TEMP_DEF res, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
ins_encode %{
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
true /* acquire */, true /* release */, false /* weak */, $res$$Register);
if (barrier_data() != ZLoadBarrierElided) {
Label good;
__ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
__ andr(rscratch1, rscratch1, $res$$Register);
__ cbz(rscratch1, good);
z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
true /* acquire */, true /* release */, false /* weak */, $res$$Register);
__ bind(good);
}
%}
ins_pipe(pipe_slow);
%}
instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
match(Set prev (GetAndSetP mem newv));
predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
effect(TEMP_DEF prev, KILL cr);
ins_cost(2 * VOLATILE_REF_COST);
format %{ "atomic_xchg $prev, $newv, [$mem]" %}
ins_encode %{
__ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
__ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register);
if (barrier_data() != ZLoadBarrierElided) {
z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
}
%}
ins_pipe(pipe_serial);
%}
instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
match(Set prev (GetAndSetP mem newv));
predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
effect(TEMP_DEF prev, KILL cr);
ins_cost(VOLATILE_REF_COST);
format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %}
ins_encode %{
__ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register);
if (barrier_data() != ZLoadBarrierElided) {
z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
}
%}
ins_pipe(pipe_serial);
%}

View File

@ -2132,6 +2132,65 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) {
return count;
}
// Push lots of registers in the bit set supplied. Don't push sp.
// Return the number of words pushed
int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
int words_pushed = 0;
// Scan bitset to accumulate register pairs
unsigned char regs[32];
int count = 0;
for (int reg = 0; reg <= 31; reg++) {
if (1 & bitset)
regs[count++] = reg;
bitset >>= 1;
}
regs[count++] = zr->encoding_nocheck();
count &= ~1; // Only push an even number of regs
// Always pushing full 128 bit registers.
if (count) {
stpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -count * wordSize * 2)));
words_pushed += 2;
}
for (int i = 2; i < count; i += 2) {
stpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
words_pushed += 2;
}
assert(words_pushed == count, "oops, pushed != count");
return count;
}
int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
int words_pushed = 0;
// Scan bitset to accumulate register pairs
unsigned char regs[32];
int count = 0;
for (int reg = 0; reg <= 31; reg++) {
if (1 & bitset)
regs[count++] = reg;
bitset >>= 1;
}
regs[count++] = zr->encoding_nocheck();
count &= ~1;
for (int i = 2; i < count; i += 2) {
ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
words_pushed += 2;
}
if (count) {
ldpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, count * wordSize * 2)));
words_pushed += 2;
}
assert(words_pushed == count, "oops, pushed != count");
return count;
}
#ifdef ASSERT
void MacroAssembler::verify_heapbase(const char* msg) {
#if 0

View File

@ -442,12 +442,18 @@ private:
int push(unsigned int bitset, Register stack);
int pop(unsigned int bitset, Register stack);
int push_fp(unsigned int bitset, Register stack);
int pop_fp(unsigned int bitset, Register stack);
void mov(Register dst, Address a);
public:
void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
void push_fp(RegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
void pop_fp(RegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
// Push and pop everything that might be clobbered by a native
// runtime call except rscratch1 and rscratch2. (They are always
// scratch, so we don't have to protect them.) Only save the lower

View File

@ -230,6 +230,11 @@ public:
return *this;
}
RegSet &operator-=(const RegSet aSet) {
*this = *this - aSet;
return *this;
}
static RegSet of(Register r1) {
return RegSet(r1);
}

View File

@ -23,20 +23,7 @@
#include "precompiled.hpp"
#include "gc/z/zArguments.hpp"
#include "runtime/globals.hpp"
#include "runtime/globals_extension.hpp"
#include "utilities/debug.hpp"
void ZArguments::initialize_platform() {
#ifdef COMPILER2
// The C2 barrier slow path expects vector registers to be least
// 16 bytes wide, which is the minimum width available on all
// x86-64 systems. However, the user could have speficied a lower
// number on the command-line, in which case we print a warning
// and raise it to 16.
if (MaxVectorSize < 16) {
warning("ZGC requires MaxVectorSize to be at least 16");
FLAG_SET_DEFAULT(MaxVectorSize, 16);
}
#endif
// Does nothing
}

View File

@ -24,22 +24,22 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "code/codeBlob.hpp"
#include "code/vmreg.inline.hpp"
#include "gc/z/zBarrier.inline.hpp"
#include "gc/z/zBarrierSet.hpp"
#include "gc/z/zBarrierSetAssembler.hpp"
#include "gc/z/zBarrierSetRuntime.hpp"
#include "memory/resourceArea.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "runtime/sharedRuntime.hpp"
#include "utilities/macros.hpp"
#ifdef COMPILER1
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/z/c1/zBarrierSetC1.hpp"
#endif // COMPILER1
ZBarrierSetAssembler::ZBarrierSetAssembler() :
_load_barrier_slow_stub(),
_load_barrier_weak_slow_stub() {}
#ifdef COMPILER2
#include "gc/z/c2/zBarrierSetC2.hpp"
#endif // COMPILER2
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
@ -344,137 +344,327 @@ void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler*
#endif // COMPILER1
#ifdef COMPILER2
OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
if (!OptoReg::is_reg(opto_reg)) {
return OptoReg::Bad;
}
const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
if (vm_reg->is_XMMRegister()) {
opto_reg &= ~15;
switch (node->ideal_reg()) {
case Op_VecX:
opto_reg |= 2;
break;
case Op_VecY:
opto_reg |= 4;
break;
case Op_VecZ:
opto_reg |= 8;
break;
default:
opto_reg |= 1;
break;
}
}
return opto_reg;
}
// We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
extern int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);
#undef __
#define __ cgen->assembler()->
#define __ _masm->
// Generates a register specific stub for calling
// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or
// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded().
//
// The raddr register serves as both input and output for this stub. When the stub is
// called the raddr register contains the object field address (oop*) where the bad oop
// was loaded from, which caused the slow path to be taken. On return from the stub the
// raddr register contains the good/healed oop returned from
// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or
// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded().
static address generate_load_barrier_stub(StubCodeGenerator* cgen, Register raddr, DecoratorSet decorators) {
// Don't generate stub for invalid registers
if (raddr == rsp || raddr == r15) {
return NULL;
class ZSaveLiveRegisters {
private:
struct XMMRegisterData {
XMMRegister _reg;
int _size;
// Used by GrowableArray::find()
bool operator == (const XMMRegisterData& other) {
return _reg == other._reg;
}
};
MacroAssembler* const _masm;
GrowableArray<Register> _gp_registers;
GrowableArray<XMMRegisterData> _xmm_registers;
int _spill_size;
int _spill_offset;
static int xmm_compare_register_size(XMMRegisterData* left, XMMRegisterData* right) {
if (left->_size == right->_size) {
return 0;
}
return (left->_size < right->_size) ? -1 : 1;
}
// Create stub name
char name[64];
const bool weak = (decorators & ON_WEAK_OOP_REF) != 0;
os::snprintf(name, sizeof(name), "zgc_load_barrier%s_stub_%s", weak ? "_weak" : "", raddr->name());
__ align(CodeEntryAlignment);
StubCodeMark mark(cgen, "StubRoutines", os::strdup(name, mtCode));
address start = __ pc();
// Save live registers
if (raddr != rax) {
__ push(rax);
}
if (raddr != rcx) {
__ push(rcx);
}
if (raddr != rdx) {
__ push(rdx);
}
if (raddr != rsi) {
__ push(rsi);
}
if (raddr != rdi) {
__ push(rdi);
}
if (raddr != r8) {
__ push(r8);
}
if (raddr != r9) {
__ push(r9);
}
if (raddr != r10) {
__ push(r10);
}
if (raddr != r11) {
__ push(r11);
static int xmm_slot_size(OptoReg::Name opto_reg) {
// The low order 4 bytes denote what size of the XMM register is live
return (opto_reg & 15) << 3;
}
// Setup arguments
if (raddr != c_rarg1) {
__ movq(c_rarg1, raddr);
}
__ movq(c_rarg0, Address(raddr, 0));
// Call barrier function
__ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), c_rarg0, c_rarg1);
// Move result returned in rax to raddr, if needed
if (raddr != rax) {
__ movq(raddr, rax);
static uint xmm_ideal_reg_for_size(int reg_size) {
switch (reg_size) {
case 8:
return Op_VecD;
case 16:
return Op_VecX;
case 32:
return Op_VecY;
case 64:
return Op_VecZ;
default:
fatal("Invalid register size %d", reg_size);
return 0;
}
}
// Restore saved registers
if (raddr != r11) {
__ pop(r11);
}
if (raddr != r10) {
__ pop(r10);
}
if (raddr != r9) {
__ pop(r9);
}
if (raddr != r8) {
__ pop(r8);
}
if (raddr != rdi) {
__ pop(rdi);
}
if (raddr != rsi) {
__ pop(rsi);
}
if (raddr != rdx) {
__ pop(rdx);
}
if (raddr != rcx) {
__ pop(rcx);
}
if (raddr != rax) {
__ pop(rax);
bool xmm_needs_vzeroupper() const {
return _xmm_registers.is_nonempty() && _xmm_registers.at(0)._size > 16;
}
__ ret(0);
void xmm_register_save(const XMMRegisterData& reg_data) {
const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
_spill_offset -= reg_data._size;
vec_spill_helper(__ code(), false /* do_size */, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
}
return start;
void xmm_register_restore(const XMMRegisterData& reg_data) {
const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
vec_spill_helper(__ code(), false /* do_size */, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
_spill_offset += reg_data._size;
}
void gp_register_save(Register reg) {
_spill_offset -= 8;
__ movq(Address(rsp, _spill_offset), reg);
}
void gp_register_restore(Register reg) {
__ movq(reg, Address(rsp, _spill_offset));
_spill_offset += 8;
}
void initialize(ZLoadBarrierStubC2* stub) {
// Create mask of caller saved registers that need to
// be saved/restored if live
RegMask caller_saved;
caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg()));
caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
caller_saved.Remove(OptoReg::as_OptoReg(stub->ref()->as_VMReg()));
// Create mask of live registers
RegMask live = stub->live();
if (stub->tmp() != noreg) {
live.Insert(OptoReg::as_OptoReg(stub->tmp()->as_VMReg()));
}
int gp_spill_size = 0;
int xmm_spill_size = 0;
// Record registers that needs to be saved/restored
while (live.is_NotEmpty()) {
const OptoReg::Name opto_reg = live.find_first_elem();
const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
live.Remove(opto_reg);
if (vm_reg->is_Register()) {
if (caller_saved.Member(opto_reg)) {
_gp_registers.append(vm_reg->as_Register());
gp_spill_size += 8;
}
} else if (vm_reg->is_XMMRegister()) {
// We encode in the low order 4 bits of the opto_reg, how large part of the register is live
const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15);
const int reg_size = xmm_slot_size(opto_reg);
const XMMRegisterData reg_data = { vm_reg_base->as_XMMRegister(), reg_size };
const int reg_index = _xmm_registers.find(reg_data);
if (reg_index == -1) {
// Not previously appended
_xmm_registers.append(reg_data);
xmm_spill_size += reg_size;
} else {
// Previously appended, update size
const int reg_size_prev = _xmm_registers.at(reg_index)._size;
if (reg_size > reg_size_prev) {
_xmm_registers.at_put(reg_index, reg_data);
xmm_spill_size += reg_size - reg_size_prev;
}
}
} else {
fatal("Unexpected register type");
}
}
// Sort by size, largest first
_xmm_registers.sort(xmm_compare_register_size);
// Stack pointer must be 16 bytes aligned for the call
_spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size, 16);
}
public:
ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
_masm(masm),
_gp_registers(),
_xmm_registers(),
_spill_size(0),
_spill_offset(0) {
//
// Stack layout after registers have been spilled:
//
// | ... | original rsp, 16 bytes aligned
// ------------------
// | zmm0 high |
// | ... |
// | zmm0 low | 16 bytes aligned
// | ... |
// | ymm1 high |
// | ... |
// | ymm1 low | 16 bytes aligned
// | ... |
// | xmmN high |
// | ... |
// | xmmN low | 8 bytes aligned
// | reg0 | 8 bytes aligned
// | reg1 |
// | ... |
// | regN | new rsp, if 16 bytes aligned
// | <padding> | else new rsp, 16 bytes aligned
// ------------------
//
// Figure out what registers to save/restore
initialize(stub);
// Allocate stack space
if (_spill_size > 0) {
__ subptr(rsp, _spill_size);
}
// Save XMM/YMM/ZMM registers
for (int i = 0; i < _xmm_registers.length(); i++) {
xmm_register_save(_xmm_registers.at(i));
}
if (xmm_needs_vzeroupper()) {
__ vzeroupper();
}
// Save general purpose registers
for (int i = 0; i < _gp_registers.length(); i++) {
gp_register_save(_gp_registers.at(i));
}
}
~ZSaveLiveRegisters() {
// Restore general purpose registers
for (int i = _gp_registers.length() - 1; i >= 0; i--) {
gp_register_restore(_gp_registers.at(i));
}
__ vzeroupper();
// Restore XMM/YMM/ZMM registers
for (int i = _xmm_registers.length() - 1; i >= 0; i--) {
xmm_register_restore(_xmm_registers.at(i));
}
// Free stack space
if (_spill_size > 0) {
__ addptr(rsp, _spill_size);
}
}
};
class ZSetupArguments {
private:
MacroAssembler* const _masm;
const Register _ref;
const Address _ref_addr;
public:
ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
_masm(masm),
_ref(stub->ref()),
_ref_addr(stub->ref_addr()) {
// Setup arguments
if (_ref_addr.base() == noreg) {
// No self healing
if (_ref != c_rarg0) {
__ movq(c_rarg0, _ref);
}
__ xorq(c_rarg1, c_rarg1);
} else {
// Self healing
if (_ref == c_rarg0) {
__ lea(c_rarg1, _ref_addr);
} else if (_ref != c_rarg1) {
__ lea(c_rarg1, _ref_addr);
__ movq(c_rarg0, _ref);
} else if (_ref_addr.base() != c_rarg0 && _ref_addr.index() != c_rarg0) {
__ movq(c_rarg0, _ref);
__ lea(c_rarg1, _ref_addr);
} else {
__ xchgq(c_rarg0, c_rarg1);
if (_ref_addr.base() == c_rarg0) {
__ lea(c_rarg1, Address(c_rarg1, _ref_addr.index(), _ref_addr.scale(), _ref_addr.disp()));
} else if (_ref_addr.index() == c_rarg0) {
__ lea(c_rarg1, Address(_ref_addr.base(), c_rarg1, _ref_addr.scale(), _ref_addr.disp()));
} else {
ShouldNotReachHere();
}
}
}
}
~ZSetupArguments() {
// Transfer result
if (_ref != rax) {
__ movq(_ref, rax);
}
}
};
#undef __
#define __ masm->
void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
BLOCK_COMMENT("ZLoadBarrierStubC2");
// Stub entry
__ bind(*stub->entry());
{
ZSaveLiveRegisters save_live_registers(masm, stub);
ZSetupArguments setup_arguments(masm, stub);
__ call(RuntimeAddress(stub->slow_path()));
}
// Stub exit
__ jmp(*stub->continuation());
}
#undef __
static void barrier_stubs_init_inner(const char* label, const DecoratorSet decorators, address* stub) {
const int nregs = RegisterImpl::number_of_registers;
const int code_size = nregs * 128; // Rough estimate of code size
ResourceMark rm;
CodeBuffer buf(BufferBlob::create(label, code_size));
StubCodeGenerator cgen(&buf);
for (int i = 0; i < nregs; i++) {
const Register reg = as_Register(i);
stub[i] = generate_load_barrier_stub(&cgen, reg, decorators);
}
}
void ZBarrierSetAssembler::barrier_stubs_init() {
barrier_stubs_init_inner("zgc_load_barrier_stubs", ON_STRONG_OOP_REF, _load_barrier_slow_stub);
barrier_stubs_init_inner("zgc_load_barrier_weak_stubs", ON_WEAK_OOP_REF, _load_barrier_weak_slow_stub);
}
address ZBarrierSetAssembler::load_barrier_slow_stub(Register reg) {
return _load_barrier_slow_stub[reg->encoding()];
}
address ZBarrierSetAssembler::load_barrier_weak_slow_stub(Register reg) {
return _load_barrier_weak_slow_stub[reg->encoding()];
}
#endif // COMPILER2

View File

@ -24,6 +24,14 @@
#ifndef CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
#define CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
#include "code/vmreg.hpp"
#include "oops/accessDecorators.hpp"
#ifdef COMPILER2
#include "opto/optoreg.hpp"
#endif // COMPILER2
class MacroAssembler;
#ifdef COMPILER1
class LIR_Assembler;
class LIR_OprDesc;
@ -32,14 +40,13 @@ class StubAssembler;
class ZLoadBarrierStubC1;
#endif // COMPILER1
#ifdef COMPILER2
class Node;
class ZLoadBarrierStubC2;
#endif // COMPILER2
class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
private:
address _load_barrier_slow_stub[RegisterImpl::number_of_registers];
address _load_barrier_weak_slow_stub[RegisterImpl::number_of_registers];
public:
ZBarrierSetAssembler();
virtual void load_at(MacroAssembler* masm,
DecoratorSet decorators,
BasicType type,
@ -82,10 +89,13 @@ public:
DecoratorSet decorators) const;
#endif // COMPILER1
virtual void barrier_stubs_init();
#ifdef COMPILER2
OptoReg::Name refine_register(const Node* node,
OptoReg::Name opto_reg);
address load_barrier_slow_stub(Register reg);
address load_barrier_weak_slow_stub(Register reg);
void generate_c2_load_barrier_stub(MacroAssembler* masm,
ZLoadBarrierStubC2* stub) const;
#endif // COMPILER2
};
#endif // CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP

View File

@ -24,190 +24,144 @@
source_hpp %{
#include "gc/z/c2/zBarrierSetC2.hpp"
#include "gc/z/zThreadLocalData.hpp"
%}
source %{
#include "gc/z/zBarrierSetAssembler.hpp"
static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
__ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::notZero, *stub->entry());
__ bind(*stub->continuation());
}
static void z_load_barrier_slow_reg(MacroAssembler& _masm, Register dst, Address src, bool weak) {
assert(dst != rsp, "Invalid register");
assert(dst != r15, "Invalid register");
const address stub = weak ? ZBarrierSet::assembler()->load_barrier_weak_slow_stub(dst)
: ZBarrierSet::assembler()->load_barrier_slow_stub(dst);
__ lea(dst, src);
__ call(RuntimeAddress(stub));
static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
__ jmp(*stub->entry());
__ bind(*stub->continuation());
}
%}
// For XMM and YMM enabled processors
instruct zLoadBarrierSlowRegXmmAndYmm(rRegP dst, memory src, rFlagsReg cr,
rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3,
rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
match(Set dst (LoadBarrierSlowReg src dst));
predicate(UseAVX <= 2 && !n->as_LoadBarrierSlowReg()->is_weak());
// Load Pointer
instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr)
%{
predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
match(Set dst (LoadP mem));
effect(KILL cr, TEMP dst);
effect(KILL cr,
KILL x0, KILL x1, KILL x2, KILL x3,
KILL x4, KILL x5, KILL x6, KILL x7,
KILL x8, KILL x9, KILL x10, KILL x11,
KILL x12, KILL x13, KILL x14, KILL x15);
ins_cost(125);
format %{ "lea $dst, $src\n\t"
"call #ZLoadBarrierSlowPath" %}
format %{ "movq $dst, $mem" %}
ins_encode %{
z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, false /* weak */);
__ movptr($dst$$Register, $mem$$Address);
if (barrier_data() != ZLoadBarrierElided) {
z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, false /* weak */);
}
%}
ins_pipe(pipe_slow);
ins_pipe(ialu_reg_mem);
%}
// For ZMM enabled processors
instruct zLoadBarrierSlowRegZmm(rRegP dst, memory src, rFlagsReg cr,
rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3,
rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15,
rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19,
rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23,
rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
// Load Weak Pointer
instruct zLoadWeakP(rRegP dst, memory mem, rFlagsReg cr)
%{
predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierWeak);
match(Set dst (LoadP mem));
effect(KILL cr, TEMP dst);
match(Set dst (LoadBarrierSlowReg src dst));
predicate(UseAVX == 3 && !n->as_LoadBarrierSlowReg()->is_weak());
ins_cost(125);
effect(KILL cr,
KILL x0, KILL x1, KILL x2, KILL x3,
KILL x4, KILL x5, KILL x6, KILL x7,
KILL x8, KILL x9, KILL x10, KILL x11,
KILL x12, KILL x13, KILL x14, KILL x15,
KILL x16, KILL x17, KILL x18, KILL x19,
KILL x20, KILL x21, KILL x22, KILL x23,
KILL x24, KILL x25, KILL x26, KILL x27,
KILL x28, KILL x29, KILL x30, KILL x31);
format %{ "lea $dst, $src\n\t"
"call #ZLoadBarrierSlowPath" %}
format %{ "movq $dst, $mem" %}
ins_encode %{
z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, false /* weak */);
__ movptr($dst$$Register, $mem$$Address);
z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, true /* weak */);
%}
ins_pipe(pipe_slow);
ins_pipe(ialu_reg_mem);
%}
// For XMM and YMM enabled processors
instruct zLoadBarrierWeakSlowRegXmmAndYmm(rRegP dst, memory src, rFlagsReg cr,
rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3,
rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
match(Set dst (LoadBarrierSlowReg src dst));
predicate(UseAVX <= 2 && n->as_LoadBarrierSlowReg()->is_weak());
instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr) %{
match(Set oldval (CompareAndExchangeP mem (Binary oldval newval)));
predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
effect(KILL cr, TEMP tmp);
effect(KILL cr,
KILL x0, KILL x1, KILL x2, KILL x3,
KILL x4, KILL x5, KILL x6, KILL x7,
KILL x8, KILL x9, KILL x10, KILL x11,
KILL x12, KILL x13, KILL x14, KILL x15);
format %{ "lea $dst, $src\n\t"
"call #ZLoadBarrierSlowPath" %}
format %{ "lock\n\t"
"cmpxchgq $newval, $mem" %}
ins_encode %{
z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, true /* weak */);
if (barrier_data() != ZLoadBarrierElided) {
__ movptr($tmp$$Register, $oldval$$Register);
}
__ lock();
__ cmpxchgptr($newval$$Register, $mem$$Address);
if (barrier_data() != ZLoadBarrierElided) {
Label good;
__ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::zero, good);
z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
__ movptr($oldval$$Register, $tmp$$Register);
__ lock();
__ cmpxchgptr($newval$$Register, $mem$$Address);
__ bind(good);
}
%}
ins_pipe(pipe_slow);
ins_pipe(pipe_cmpxchg);
%}
// For ZMM enabled processors
instruct zLoadBarrierWeakSlowRegZmm(rRegP dst, memory src, rFlagsReg cr,
rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3,
rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15,
rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19,
rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23,
rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlagsReg cr, rax_RegP oldval) %{
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
effect(KILL cr, KILL oldval, TEMP tmp);
match(Set dst (LoadBarrierSlowReg src dst));
predicate(UseAVX == 3 && n->as_LoadBarrierSlowReg()->is_weak());
effect(KILL cr,
KILL x0, KILL x1, KILL x2, KILL x3,
KILL x4, KILL x5, KILL x6, KILL x7,
KILL x8, KILL x9, KILL x10, KILL x11,
KILL x12, KILL x13, KILL x14, KILL x15,
KILL x16, KILL x17, KILL x18, KILL x19,
KILL x20, KILL x21, KILL x22, KILL x23,
KILL x24, KILL x25, KILL x26, KILL x27,
KILL x28, KILL x29, KILL x30, KILL x31);
format %{ "lea $dst, $src\n\t"
"call #ZLoadBarrierSlowPath" %}
format %{ "lock\n\t"
"cmpxchgq $newval, $mem\n\t"
"sete $res\n\t"
"movzbl $res, $res" %}
ins_encode %{
z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, true /* weak */);
if (barrier_data() != ZLoadBarrierElided) {
__ movptr($tmp$$Register, $oldval$$Register);
}
__ lock();
__ cmpxchgptr($newval$$Register, $mem$$Address);
if (barrier_data() != ZLoadBarrierElided) {
Label good;
__ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::zero, good);
z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
__ movptr($oldval$$Register, $tmp$$Register);
__ lock();
__ cmpxchgptr($newval$$Register, $mem$$Address);
__ bind(good);
__ cmpptr($tmp$$Register, $oldval$$Register);
}
__ setb(Assembler::equal, $res$$Register);
__ movzbl($res$$Register, $res$$Register);
%}
ins_pipe(pipe_slow);
ins_pipe(pipe_cmpxchg);
%}
// Specialized versions of compareAndExchangeP that adds a keepalive that is consumed
// but doesn't affect output.
instruct zXChgP(memory mem, rRegP newval, rFlagsReg cr) %{
match(Set newval (GetAndSetP mem newval));
predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
effect(KILL cr);
instruct z_compareAndExchangeP(
memory mem_ptr,
rax_RegP oldval, rRegP newval, rRegP keepalive,
rFlagsReg cr) %{
predicate(VM_Version::supports_cx8());
match(Set oldval (ZCompareAndExchangeP (Binary mem_ptr keepalive) (Binary oldval newval)));
effect(KILL cr);
format %{ "xchgq $newval, $mem" %}
format %{ "cmpxchgq $mem_ptr,$newval\t# "
"If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
opcode(0x0F, 0xB1);
ins_encode(lock_prefix,
REX_reg_mem_wide(newval, mem_ptr),
OpcP, OpcS,
reg_mem(newval, mem_ptr) // lock cmpxchg
);
ins_pipe( pipe_cmpxchg );
%}
instruct z_compareAndSwapP(rRegI res,
memory mem_ptr,
rax_RegP oldval, rRegP newval, rRegP keepalive,
rFlagsReg cr) %{
predicate(VM_Version::supports_cx8());
match(Set res (ZCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval)));
match(Set res (ZWeakCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval)));
effect(KILL cr, KILL oldval);
format %{ "cmpxchgq $mem_ptr,$newval\t# "
"If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
"sete $res\n\t"
"movzbl $res, $res" %}
opcode(0x0F, 0xB1);
ins_encode(lock_prefix,
REX_reg_mem_wide(newval, mem_ptr),
OpcP, OpcS,
reg_mem(newval, mem_ptr),
REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
REX_reg_breg(res, res), // movzbl
Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
ins_pipe( pipe_cmpxchg );
%}
instruct z_xchgP( memory mem, rRegP newval, rRegP keepalive) %{
match(Set newval (ZGetAndSetP mem (Binary newval keepalive)));
format %{ "XCHGQ $newval,[$mem]" %}
ins_encode %{
__ xchgq($newval$$Register, $mem$$Address);
__ xchgptr($newval$$Register, $mem$$Address);
if (barrier_data() != ZLoadBarrierElided) {
z_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, false /* weak */);
}
%}
ins_pipe( pipe_cmpxchg );
ins_pipe(pipe_cmpxchg);
%}

View File

@ -1097,138 +1097,6 @@ reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0
reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h);
reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p);
reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d);
reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h);
reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p);
reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d);
reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h);
reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p);
reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d);
reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h);
reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p);
reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d);
reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h);
reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p);
reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d);
reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h);
reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p);
reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d);
reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h);
reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p);
reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d);
reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h);
reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p);
#ifdef _LP64
reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d);
reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h);
reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p);
reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d);
reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h);
reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p);
reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d);
reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h);
reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p);
reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d);
reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h);
reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p);
reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d);
reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h);
reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p);
reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d);
reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h);
reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p);
reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d);
reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h);
reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p);
reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d);
reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d);
reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h);
reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p);
reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d);
reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h);
reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p);
reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d);
reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h);
reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p);
reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d);
reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h);
reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p);
reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d);
reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h);
reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p);
reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d);
reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h);
reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p);
reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d);
reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h);
reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p);
reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d);
reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h);
reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p);
reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d);
reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h);
reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p);
reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d);
reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h);
reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p);
reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d);
reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h);
reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p);
reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d);
reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h);
reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p);
reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d);
reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h);
reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p);
reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d);
reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h);
reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p);
reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d);
reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h);
reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p);
reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d);
reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
#endif
%}
@ -1800,8 +1668,8 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
return (UseAVX > 2) ? 6 : 4;
}
static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st) {
int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st) {
// In 64-bit VM size calculation is very complex. Emitting instructions
// into scratch buffer is used to get size in 64-bit VM.
LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )

View File

@ -1058,8 +1058,8 @@ static enum RC rc_class(OptoReg::Name reg)
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st);
static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);
int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);
static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
int dst_offset, uint ireg, outputStream* st) {
@ -4260,200 +4260,6 @@ operand cmpOpUCF2() %{
%}
%}
// Operands for bound floating pointer register arguments
operand rxmm0() %{
constraint(ALLOC_IN_RC(xmm0_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm1() %{
constraint(ALLOC_IN_RC(xmm1_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm2() %{
constraint(ALLOC_IN_RC(xmm2_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm3() %{
constraint(ALLOC_IN_RC(xmm3_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm4() %{
constraint(ALLOC_IN_RC(xmm4_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm5() %{
constraint(ALLOC_IN_RC(xmm5_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm6() %{
constraint(ALLOC_IN_RC(xmm6_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm7() %{
constraint(ALLOC_IN_RC(xmm7_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm8() %{
constraint(ALLOC_IN_RC(xmm8_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm9() %{
constraint(ALLOC_IN_RC(xmm9_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm10() %{
constraint(ALLOC_IN_RC(xmm10_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm11() %{
constraint(ALLOC_IN_RC(xmm11_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm12() %{
constraint(ALLOC_IN_RC(xmm12_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm13() %{
constraint(ALLOC_IN_RC(xmm13_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm14() %{
constraint(ALLOC_IN_RC(xmm14_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm15() %{
constraint(ALLOC_IN_RC(xmm15_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm16() %{
constraint(ALLOC_IN_RC(xmm16_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm17() %{
constraint(ALLOC_IN_RC(xmm17_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm18() %{
constraint(ALLOC_IN_RC(xmm18_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm19() %{
constraint(ALLOC_IN_RC(xmm19_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm20() %{
constraint(ALLOC_IN_RC(xmm20_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm21() %{
constraint(ALLOC_IN_RC(xmm21_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm22() %{
constraint(ALLOC_IN_RC(xmm22_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm23() %{
constraint(ALLOC_IN_RC(xmm23_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm24() %{
constraint(ALLOC_IN_RC(xmm24_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm25() %{
constraint(ALLOC_IN_RC(xmm25_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm26() %{
constraint(ALLOC_IN_RC(xmm26_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm27() %{
constraint(ALLOC_IN_RC(xmm27_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm28() %{
constraint(ALLOC_IN_RC(xmm28_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm29() %{
constraint(ALLOC_IN_RC(xmm29_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm30() %{
constraint(ALLOC_IN_RC(xmm30_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
operand rxmm31() %{
constraint(ALLOC_IN_RC(xmm31_reg));
match(VecX);
format%{%}
interface(REG_INTER);
%}
//----------OPERAND CLASSES----------------------------------------------------
// Operand Classes are groups of operands that are used as to simplify
// instruction definitions by not requiring the AD writer to specify separate
@ -5346,6 +5152,7 @@ instruct loadRange(rRegI dst, memory mem)
instruct loadP(rRegP dst, memory mem)
%{
match(Set dst (LoadP mem));
predicate(n->as_Load()->barrier_data() == 0);
ins_cost(125); // XXX
format %{ "movq $dst, $mem\t# ptr" %}
@ -7794,6 +7601,7 @@ instruct storePConditional(memory heap_top_ptr,
rax_RegP oldval, rRegP newval,
rFlagsReg cr)
%{
predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
@ -7845,7 +7653,7 @@ instruct compareAndSwapP(rRegI res,
rax_RegP oldval, rRegP newval,
rFlagsReg cr)
%{
predicate(VM_Version::supports_cx8());
predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
effect(KILL cr, KILL oldval);
@ -8087,7 +7895,7 @@ instruct compareAndExchangeP(
rax_RegP oldval, rRegP newval,
rFlagsReg cr)
%{
predicate(VM_Version::supports_cx8());
predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
effect(KILL cr);
@ -8232,6 +8040,7 @@ instruct xchgL( memory mem, rRegL newval) %{
instruct xchgP( memory mem, rRegP newval) %{
match(Set newval (GetAndSetP mem newval));
predicate(n->as_LoadStore()->barrier_data() == 0);
format %{ "XCHGQ $newval,[$mem]" %}
ins_encode %{
__ xchgq($newval$$Register, $mem$$Address);
@ -11974,6 +11783,7 @@ instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
%{
match(Set cr (CmpP op1 (LoadP op2)));
predicate(n->in(2)->as_Load()->barrier_data() == 0);
ins_cost(500); // XXX
format %{ "cmpq $op1, $op2\t# ptr" %}
@ -11999,7 +11809,8 @@ instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
// and raw pointers have no anti-dependencies.
instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
%{
predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
n->in(2)->as_Load()->barrier_data() == 0);
match(Set cr (CmpP op1 (LoadP op2)));
format %{ "cmpq $op1, $op2\t# raw ptr" %}
@ -12024,7 +11835,8 @@ instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
// any compare to a zero should be eq/neq.
instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
%{
predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
n->in(1)->as_Load()->barrier_data() == 0);
match(Set cr (CmpP (LoadP op) zero));
ins_cost(500); // XXX
@ -12037,7 +11849,9 @@ instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
%{
predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL));
predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
(CompressedKlassPointers::base() == NULL) &&
n->in(1)->as_Load()->barrier_data() == 0);
match(Set cr (CmpP (LoadP mem) zero));
format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}

View File

@ -773,11 +773,6 @@ bool InstructForm::captures_bottom_type(FormDict &globals) const {
!strcmp(_matrule->_rChild->_opType,"CheckCastPP") ||
!strcmp(_matrule->_rChild->_opType,"GetAndSetP") ||
!strcmp(_matrule->_rChild->_opType,"GetAndSetN") ||
#if INCLUDE_ZGC
!strcmp(_matrule->_rChild->_opType,"ZGetAndSetP") ||
!strcmp(_matrule->_rChild->_opType,"ZCompareAndExchangeP") ||
!strcmp(_matrule->_rChild->_opType,"LoadBarrierSlowReg") ||
#endif
#if INCLUDE_SHENANDOAHGC
!strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeP") ||
!strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeN") ||
@ -3510,9 +3505,6 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
"StoreCM",
"GetAndSetB", "GetAndSetS", "GetAndAddI", "GetAndSetI", "GetAndSetP",
"GetAndAddB", "GetAndAddS", "GetAndAddL", "GetAndSetL", "GetAndSetN",
#if INCLUDE_ZGC
"ZGetAndSetP", "ZCompareAndSwapP", "ZCompareAndExchangeP", "ZWeakCompareAndSwapP",
#endif
"ClearArray"
};
int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*);

View File

@ -66,8 +66,7 @@ NOT_PRODUCT(cflags(TraceOptoOutput, bool, TraceOptoOutput, TraceOptoOutput))
cflags(VectorizeDebug, uintx, 0, VectorizeDebug) \
cflags(CloneMapDebug, bool, false, CloneMapDebug) \
cflags(IGVPrintLevel, intx, PrintIdealGraphLevel, IGVPrintLevel) \
cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit) \
ZGC_ONLY(cflags(ZTraceLoadBarriers, bool, false, ZTraceLoadBarriers))
cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit)
#else
#define compilerdirectives_c2_flags(cflags)
#endif

View File

@ -264,7 +264,7 @@ public:
virtual void clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const;
// Support for GC barriers emitted during parsing
virtual bool has_load_barriers() const { return false; }
virtual bool has_load_barrier_nodes() const { return false; }
virtual bool is_gc_barrier_node(Node* node) const { return false; }
virtual Node* step_over_gc_barrier(Node* c) const { return c; }
virtual Node* step_over_gc_barrier_ctrl(Node* c) const { return c; }
@ -287,13 +287,9 @@ public:
virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return false; }
virtual bool has_special_unique_user(const Node* node) const { return false; }
virtual bool needs_anti_dependence_check(const Node* node) const { return true; }
virtual void barrier_insertion_phase(Compile* C, PhaseIterGVN &igvn) const { }
enum CompilePhase {
BeforeOptimize,
BeforeLateInsertion,
BeforeMacroExpand,
BeforeCodeGen
};
@ -320,6 +316,10 @@ public:
virtual Node* split_if_pre(PhaseIdealLoop* phase, Node* n) const { return NULL; }
virtual bool build_loop_late_post(PhaseIdealLoop* phase, Node* n) const { return false; }
virtual bool sink_node(PhaseIdealLoop* phase, Node* n, Node* x, Node* x_ctrl, Node* n_ctrl) const { return false; }
virtual void late_barrier_analysis() const { }
virtual int estimate_stub_size() const { return 0; }
virtual void emit_stubs(CodeBuffer& cb) const { }
};
#endif // SHARE_GC_SHARED_C2_BARRIERSETC2_HPP

View File

@ -103,7 +103,7 @@ public:
static const TypeFunc* write_ref_field_pre_entry_Type();
static const TypeFunc* shenandoah_clone_barrier_Type();
static const TypeFunc* shenandoah_load_reference_barrier_Type();
virtual bool has_load_barriers() const { return true; }
virtual bool has_load_barrier_nodes() const { return true; }
// This is the entry-point for the backend to perform accesses through the Access API.
virtual void clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const;

File diff suppressed because it is too large Load Diff

View File

@ -29,134 +29,38 @@
#include "opto/node.hpp"
#include "utilities/growableArray.hpp"
class ZCompareAndSwapPNode : public CompareAndSwapPNode {
public:
ZCompareAndSwapPNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { }
virtual int Opcode() const;
};
const uint8_t ZLoadBarrierStrong = 1;
const uint8_t ZLoadBarrierWeak = 2;
const uint8_t ZLoadBarrierElided = 3;
class ZWeakCompareAndSwapPNode : public WeakCompareAndSwapPNode {
public:
ZWeakCompareAndSwapPNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : WeakCompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { }
virtual int Opcode() const;
};
class ZCompareAndExchangePNode : public CompareAndExchangePNode {
public:
ZCompareAndExchangePNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, const Type* t, MemNode::MemOrd mem_ord) : CompareAndExchangePNode(c, mem, adr, val, ex, at, t, mem_ord) { }
virtual int Opcode() const;
};
class ZGetAndSetPNode : public GetAndSetPNode {
public:
ZGetAndSetPNode(Node* c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* t) : GetAndSetPNode(c, mem, adr, val, at, t) { }
virtual int Opcode() const;
};
class LoadBarrierNode : public MultiNode {
class ZLoadBarrierStubC2 : public ResourceObj {
private:
bool _weak; // On strong or weak oop reference
static bool is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n);
void push_dominated_barriers(PhaseIterGVN* igvn) const;
const MachNode* _node;
const Address _ref_addr;
const Register _ref;
const Register _tmp;
const bool _weak;
Label _entry;
Label _continuation;
ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak);
public:
enum {
Control,
Memory,
Oop,
Address,
Number_of_Outputs = Address,
Similar,
Number_of_Inputs
};
static ZLoadBarrierStubC2* create(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak);
LoadBarrierNode(Compile* C,
Node* c,
Node* mem,
Node* val,
Node* adr,
bool weak);
virtual int Opcode() const;
virtual uint size_of() const;
virtual bool cmp(const Node& n) const;
virtual const Type *bottom_type() const;
virtual const TypePtr* adr_type() const;
virtual const Type *Value(PhaseGVN *phase) const;
virtual Node *Identity(PhaseGVN *phase);
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual uint match_edge(uint idx) const;
LoadBarrierNode* has_dominating_barrier(PhaseIdealLoop* phase,
bool linear_only,
bool look_for_similar);
void fix_similar_in_uses(PhaseIterGVN* igvn);
bool has_true_uses() const;
bool can_be_eliminated() const {
return !in(Similar)->is_top();
}
bool is_weak() const {
return _weak;
}
};
class LoadBarrierSlowRegNode : public TypeNode {
private:
bool _is_weak;
public:
LoadBarrierSlowRegNode(Node *c,
Node *adr,
Node *src,
const TypePtr* t,
bool weak) :
TypeNode(t, 3), _is_weak(weak) {
init_req(1, adr);
init_req(2, src);
init_class_id(Class_LoadBarrierSlowReg);
}
virtual uint size_of() const {
return sizeof(*this);
}
virtual const char * name() {
return "LoadBarrierSlowRegNode";
}
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) {
return NULL;
}
virtual int Opcode() const;
bool is_weak() { return _is_weak; }
};
class ZBarrierSetC2State : public ResourceObj {
private:
// List of load barrier nodes which need to be expanded before matching
GrowableArray<LoadBarrierNode*>* _load_barrier_nodes;
public:
ZBarrierSetC2State(Arena* comp_arena);
int load_barrier_count() const;
void add_load_barrier_node(LoadBarrierNode* n);
void remove_load_barrier_node(LoadBarrierNode* n);
LoadBarrierNode* load_barrier_node(int idx) const;
Address ref_addr() const;
Register ref() const;
Register tmp() const;
address slow_path() const;
RegMask& live() const;
Label* entry();
Label* continuation();
};
class ZBarrierSetC2 : public BarrierSetC2 {
private:
ZBarrierSetC2State* state() const;
void expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const;
#ifdef ASSERT
void verify_gc_barriers(bool post_parse) const;
#endif
void compute_liveness_at_stubs() const;
void analyze_dominating_barriers() const;
protected:
virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const;
@ -174,43 +78,14 @@ protected:
public:
virtual void* create_barrier_state(Arena* comp_arena) const;
virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc,
BasicType type,
bool is_clone,
ArrayCopyPhase phase) const;
virtual bool has_load_barriers() const { return true; }
virtual bool is_gc_barrier_node(Node* node) const;
virtual Node* step_over_gc_barrier(Node* c) const;
virtual Node* step_over_gc_barrier_ctrl(Node* c) const;
virtual void register_potential_barrier_node(Node* node) const;
virtual void unregister_potential_barrier_node(Node* node) const;
virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { }
virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const;
virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const;
virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const;
virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const;
virtual bool final_graph_reshaping(Compile* compile, Node* n, uint opcode) const;
virtual bool matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const;
virtual bool matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const;
virtual bool needs_anti_dependence_check(const Node* node) const;
#ifdef ASSERT
virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const;
#endif
// Load barrier insertion and expansion external
virtual void barrier_insertion_phase(Compile* C, PhaseIterGVN &igvn) const;
virtual bool optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const;
virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return (mode == LoopOptsZBarrierInsertion); }
virtual bool strip_mined_loops_expanded(LoopOptsMode mode) const { return mode == LoopOptsZBarrierInsertion; }
private:
// Load barrier insertion and expansion internal
void insert_barriers_on_unsafe(PhaseIdealLoop* phase) const;
void clean_catch_blocks(PhaseIdealLoop* phase, bool verify = false) const;
void insert_load_barriers(PhaseIdealLoop* phase) const;
LoadNode* insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) const;
void insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited) const;
virtual void late_barrier_analysis() const;
virtual int estimate_stub_size() const;
virtual void emit_stubs(CodeBuffer& cb) const;
};
#endif // SHARE_GC_Z_C2_ZBARRIERSETC2_HPP

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -24,10 +24,7 @@
#ifndef SHARE_GC_Z_ZBARRIERSETASSEMBLER_HPP
#define SHARE_GC_Z_ZBARRIERSETASSEMBLER_HPP
#include "asm/macroAssembler.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "oops/accessDecorators.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/macros.hpp"
class ZBarrierSetAssemblerBase : public BarrierSetAssembler {

View File

@ -48,9 +48,6 @@
#include "opto/subnode.hpp"
#include "opto/vectornode.hpp"
#include "utilities/macros.hpp"
#if INCLUDE_ZGC
#include "gc/z/c2/zBarrierSetC2.hpp"
#endif
#if INCLUDE_SHENANDOAHGC
#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
#endif

View File

@ -193,17 +193,6 @@ macro(LoadP)
macro(LoadN)
macro(LoadRange)
macro(LoadS)
#if INCLUDE_ZGC
#define zgcmacro(x) macro(x)
#else
#define zgcmacro(x) optionalmacro(x)
#endif
zgcmacro(LoadBarrier)
zgcmacro(LoadBarrierSlowReg)
zgcmacro(ZCompareAndSwapP)
zgcmacro(ZWeakCompareAndSwapP)
zgcmacro(ZCompareAndExchangeP)
zgcmacro(ZGetAndSetP)
macro(Lock)
macro(Loop)
macro(LoopLimit)

View File

@ -76,9 +76,6 @@
#include "utilities/align.hpp"
#include "utilities/copy.hpp"
#include "utilities/macros.hpp"
#if INCLUDE_ZGC
#include "gc/z/c2/zBarrierSetC2.hpp"
#endif
// -------------------- Compile::mach_constant_base_node -----------------------
@ -990,6 +987,7 @@ Compile::Compile( ciEnv* ci_env,
_has_method_handle_invokes(false),
_clinit_barrier_on_entry(false),
_comp_arena(mtCompiler),
_barrier_set_state(BarrierSet::barrier_set()->barrier_set_c2()->create_barrier_state(comp_arena())),
_env(ci_env),
_directive(directive),
_log(ci_env->log()),
@ -2412,13 +2410,6 @@ void Compile::Optimize() {
print_method(PHASE_MACRO_EXPANSION, 2);
}
#ifdef ASSERT
bs->verify_gc_barriers(this, BarrierSetC2::BeforeLateInsertion);
#endif
bs->barrier_insertion_phase(C, igvn);
if (failing()) return;
{
TracePhase tp("barrierExpand", &timers[_t_barrierExpand]);
if (bs->expand_barriers(this, igvn)) {

View File

@ -55,7 +55,6 @@ class ConnectionGraph;
class IdealGraphPrinter;
class InlineTree;
class Int_Array;
class LoadBarrierNode;
class Matcher;
class MachConstantNode;
class MachConstantBaseNode;
@ -96,7 +95,6 @@ enum LoopOptsMode {
LoopOptsNone,
LoopOptsShenandoahExpand,
LoopOptsShenandoahPostExpand,
LoopOptsZBarrierInsertion,
LoopOptsSkipSplitIf,
LoopOptsVerify
};
@ -1186,11 +1184,7 @@ class Compile : public Phase {
bool in_scratch_emit_size() const { return _in_scratch_emit_size; }
enum ScratchBufferBlob {
#if defined(PPC64)
MAX_inst_size = 2048,
#else
MAX_inst_size = 1024,
#endif
MAX_locs_size = 128, // number of relocInfo elements
MAX_const_size = 128,
MAX_stubs_size = 128
@ -1265,14 +1259,30 @@ class Compile : public Phase {
// Process an OopMap Element while emitting nodes
void Process_OopMap_Node(MachNode *mach, int code_offset);
class BufferSizingData {
public:
int _stub;
int _code;
int _const;
int _reloc;
BufferSizingData() :
_stub(0),
_code(0),
_const(0),
_reloc(0)
{ };
};
// Initialize code buffer
CodeBuffer* init_buffer(uint* blk_starts);
void estimate_buffer_size(int& const_req);
CodeBuffer* init_buffer(BufferSizingData& buf_sizes);
// Write out basic block data to code buffer
void fill_buffer(CodeBuffer* cb, uint* blk_starts);
// Determine which variable sized branches can be shortened
void shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size);
void shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes);
// Compute the size of first NumberOfLoopInstrToAlign instructions
// at the head of a loop.

View File

@ -993,18 +993,6 @@ void LoopNode::verify_strip_mined(int expect_skeleton) const {
}
}
if (UseZGC && !inner_out->in(0)->is_CountedLoopEnd()) {
// In some very special cases there can be a load that has no other uses than the
// counted loop safepoint. Then its loadbarrier will be placed between the inner
// loop exit and the safepoint. This is very rare
Node* ifnode = inner_out->in(1)->in(0);
// Region->IfTrue->If == Region->Iffalse->If
if (ifnode == inner_out->in(2)->in(0)) {
inner_out = ifnode->in(0);
}
}
CountedLoopEndNode* cle = inner_out->in(0)->as_CountedLoopEnd();
assert(cle == inner->loopexit_or_null(), "mismatch");
bool has_skeleton = outer_le->in(1)->bottom_type()->singleton() && outer_le->in(1)->bottom_type()->is_int()->get_con() == 0;

View File

@ -41,9 +41,6 @@
#include "opto/rootnode.hpp"
#include "opto/subnode.hpp"
#include "utilities/macros.hpp"
#if INCLUDE_ZGC
#include "gc/z/c2/zBarrierSetC2.hpp"
#endif
//=============================================================================
//------------------------------split_thru_phi---------------------------------

View File

@ -197,7 +197,7 @@ public:
// ADLC inherit from this class.
class MachNode : public Node {
public:
MachNode() : Node((uint)0), _num_opnds(0), _opnds(NULL) {
MachNode() : Node((uint)0), _barrier(0), _num_opnds(0), _opnds(NULL) {
init_class_id(Class_Mach);
}
// Required boilerplate
@ -211,6 +211,9 @@ public:
// no constant base node input.
virtual uint mach_constant_base_node_input() const { return (uint)-1; }
uint8_t barrier_data() const { return _barrier; }
void set_barrier_data(uint data) { _barrier = data; }
// Copy inputs and operands to new node of instruction.
// Called from cisc_version() and short_branch_version().
// !!!! The method's body is defined in ad_<arch>.cpp file.
@ -255,6 +258,9 @@ public:
// output have choices - but they must use the same choice.
virtual uint two_adr( ) const { return 0; }
// The GC might require some barrier metadata for machine code emission.
uint8_t _barrier;
// Array of complex operand pointers. Each corresponds to zero or
// more leafs. Must be set by MachNode constructor to point to an
// internal array of MachOpers. The MachOper array is sized by

View File

@ -1751,6 +1751,13 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
_shared_nodes.map(leaf->_idx, ex);
}
// Have mach nodes inherit GC barrier data
if (leaf->is_LoadStore()) {
mach->set_barrier_data(leaf->as_LoadStore()->barrier_data());
} else if (leaf->is_Mem()) {
mach->set_barrier_data(leaf->as_Mem()->barrier_data());
}
return ex;
}

View File

@ -49,9 +49,6 @@
#include "utilities/copy.hpp"
#include "utilities/macros.hpp"
#include "utilities/vmError.hpp"
#if INCLUDE_ZGC
#include "gc/z/c2/zBarrierSetC2.hpp"
#endif
// Portions of code courtesy of Clifford Click
@ -2851,7 +2848,7 @@ LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const Ty
: Node(required),
_type(rt),
_adr_type(at),
_has_barrier(false)
_barrier(0)
{
init_req(MemNode::Control, c );
init_req(MemNode::Memory , mem);

View File

@ -43,6 +43,8 @@ private:
bool _unaligned_access; // Unaligned access from unsafe
bool _mismatched_access; // Mismatched access from unsafe: byte read in integer array for instance
bool _unsafe_access; // Access of unsafe origin.
uint8_t _barrier; // Bit field with barrier information
protected:
#ifdef ASSERT
const TypePtr* _adr_type; // What kind of memory is being addressed?
@ -62,18 +64,30 @@ public:
unset // The memory ordering is not set (used for testing)
} MemOrd;
protected:
MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at )
: Node(c0,c1,c2 ), _unaligned_access(false), _mismatched_access(false), _unsafe_access(false) {
MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at ) :
Node(c0,c1,c2),
_unaligned_access(false),
_mismatched_access(false),
_unsafe_access(false),
_barrier(0) {
init_class_id(Class_Mem);
debug_only(_adr_type=at; adr_type();)
}
MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 )
: Node(c0,c1,c2,c3), _unaligned_access(false), _mismatched_access(false), _unsafe_access(false) {
MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 ) :
Node(c0,c1,c2,c3),
_unaligned_access(false),
_mismatched_access(false),
_unsafe_access(false),
_barrier(0) {
init_class_id(Class_Mem);
debug_only(_adr_type=at; adr_type();)
}
MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4)
: Node(c0,c1,c2,c3,c4), _unaligned_access(false), _mismatched_access(false), _unsafe_access(false) {
MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4) :
Node(c0,c1,c2,c3,c4),
_unaligned_access(false),
_mismatched_access(false),
_unsafe_access(false),
_barrier(0) {
init_class_id(Class_Mem);
debug_only(_adr_type=at; adr_type();)
}
@ -125,6 +139,9 @@ public:
#endif
}
uint8_t barrier_data() { return _barrier; }
void set_barrier_data(uint8_t barrier_data) { _barrier = barrier_data; }
// Search through memory states which precede this node (load or store).
// Look for an exact match for the address, with no intervening
// aliased stores.
@ -181,8 +198,6 @@ private:
// this field.
const MemOrd _mo;
uint _barrier; // Bit field with barrier information
AllocateNode* is_new_object_mark_load(PhaseGVN *phase) const;
protected:
@ -196,7 +211,7 @@ protected:
public:
LoadNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt, MemOrd mo, ControlDependency control_dependency)
: MemNode(c,mem,adr,at), _control_dependency(control_dependency), _mo(mo), _barrier(0), _type(rt) {
: MemNode(c,mem,adr,at), _control_dependency(control_dependency), _mo(mo), _type(rt) {
init_class_id(Class_Load);
}
inline bool is_unordered() const { return !is_acquire(); }
@ -265,10 +280,6 @@ public:
Node* convert_to_unsigned_load(PhaseGVN& gvn);
Node* convert_to_signed_load(PhaseGVN& gvn);
void copy_barrier_info(const Node* src) { _barrier = src->as_Load()->_barrier; }
uint barrier_data() { return _barrier; }
void set_barrier_data(uint barrier_data) { _barrier |= barrier_data; }
void pin() { _control_dependency = Pinned; }
bool has_unknown_control_dependency() const { return _control_dependency == UnknownControl; }
@ -820,7 +831,7 @@ class LoadStoreNode : public Node {
private:
const Type* const _type; // What kind of value is loaded?
const TypePtr* _adr_type; // What kind of memory is being addressed?
bool _has_barrier;
uint8_t _barrier; // Bit field with barrier information
virtual uint size_of() const; // Size is bigger
public:
LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required );
@ -833,8 +844,9 @@ public:
bool result_not_used() const;
MemBarNode* trailing_membar() const;
void set_has_barrier() { _has_barrier = true; };
bool has_barrier() const { return _has_barrier; };
uint8_t barrier_data() { return _barrier; }
void set_barrier_data(uint8_t barrier_data) { _barrier = barrier_data; }
};
class LoadStoreConditionalNode : public LoadStoreNode {
@ -886,6 +898,7 @@ public:
MemNode::MemOrd order() const {
return _mem_ord;
}
virtual uint size_of() const { return sizeof(*this); }
};
class CompareAndExchangeNode : public LoadStoreNode {
@ -903,6 +916,7 @@ public:
MemNode::MemOrd order() const {
return _mem_ord;
}
virtual uint size_of() const { return sizeof(*this); }
};
//------------------------------CompareAndSwapBNode---------------------------

View File

@ -546,9 +546,6 @@ Node *Node::clone() const {
if (n->is_SafePoint()) {
n->as_SafePoint()->clone_replaced_nodes();
}
if (n->is_Load()) {
n->as_Load()->copy_barrier_info(this);
}
return n; // Return the clone
}
@ -1473,10 +1470,6 @@ bool Node::needs_anti_dependence_check() const {
if (req() < 2 || (_flags & Flag_needs_anti_dependence_check) == 0) {
return false;
}
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
if (!bs->needs_anti_dependence_check(this)) {
return false;
}
return in(1)->bottom_type()->has_memory();
}

View File

@ -83,8 +83,6 @@ class JVMState;
class JumpNode;
class JumpProjNode;
class LoadNode;
class LoadBarrierNode;
class LoadBarrierSlowRegNode;
class LoadStoreNode;
class LoadStoreConditionalNode;
class LockNode;
@ -642,7 +640,6 @@ public:
DEFINE_CLASS_ID(MemBar, Multi, 3)
DEFINE_CLASS_ID(Initialize, MemBar, 0)
DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1)
DEFINE_CLASS_ID(LoadBarrier, Multi, 4)
DEFINE_CLASS_ID(Mach, Node, 1)
DEFINE_CLASS_ID(MachReturn, Mach, 0)
@ -679,7 +676,6 @@ public:
DEFINE_CLASS_ID(EncodeNarrowPtr, Type, 6)
DEFINE_CLASS_ID(EncodeP, EncodeNarrowPtr, 0)
DEFINE_CLASS_ID(EncodePKlass, EncodeNarrowPtr, 1)
DEFINE_CLASS_ID(LoadBarrierSlowReg, Type, 7)
DEFINE_CLASS_ID(Proj, Node, 3)
DEFINE_CLASS_ID(CatchProj, Proj, 0)
@ -836,8 +832,6 @@ public:
DEFINE_CLASS_QUERY(Load)
DEFINE_CLASS_QUERY(LoadStore)
DEFINE_CLASS_QUERY(LoadStoreConditional)
DEFINE_CLASS_QUERY(LoadBarrier)
DEFINE_CLASS_QUERY(LoadBarrierSlowReg)
DEFINE_CLASS_QUERY(Lock)
DEFINE_CLASS_QUERY(Loop)
DEFINE_CLASS_QUERY(Mach)

View File

@ -31,6 +31,8 @@
#include "compiler/compileBroker.hpp"
#include "compiler/compilerDirectives.hpp"
#include "compiler/oopMap.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/c2/barrierSetC2.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/ad.hpp"
#include "opto/callnode.hpp"
@ -114,35 +116,33 @@ void Compile::Output() {
}
}
// Keeper of sizing aspects
BufferSizingData buf_sizes = BufferSizingData();
// Initialize code buffer
estimate_buffer_size(buf_sizes._const);
if (failing()) return;
// Pre-compute the length of blocks and replace
// long branches with short if machine supports it.
// Must be done before ScheduleAndBundle due to SPARC delay slots
uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1);
blk_starts[0] = 0;
shorten_branches(blk_starts, buf_sizes);
// Initialize code buffer and process short branches.
CodeBuffer* cb = init_buffer(blk_starts);
if (cb == NULL || failing()) {
ScheduleAndBundle();
if (failing()) {
return;
}
ScheduleAndBundle();
// Late barrier analysis must be done after schedule and bundle
// Otherwise liveness based spilling will fail
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
bs->late_barrier_analysis();
#ifndef PRODUCT
if (trace_opto_output()) {
tty->print("\n---- After ScheduleAndBundle ----\n");
for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
tty->print("\nBB#%03d:\n", i);
Block* block = _cfg->get_block(i);
for (uint j = 0; j < block->number_of_nodes(); j++) {
Node* n = block->get_node(j);
OptoReg::Name reg = _regalloc->get_reg_first(n);
tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
n->dump();
}
}
}
#endif
if (failing()) {
// Complete sizing of codebuffer
CodeBuffer* cb = init_buffer(buf_sizes);
if (cb == NULL || failing()) {
return;
}
@ -223,7 +223,7 @@ void Compile::compute_loop_first_inst_sizes() {
// The architecture description provides short branch variants for some long
// branch instructions. Replace eligible long branches with short branches.
void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
void Compile::shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes) {
// Compute size of each block, method size, and relocation information size
uint nblocks = _cfg->number_of_blocks();
@ -241,11 +241,11 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
bool has_short_branch_candidate = false;
// Initialize the sizes to 0
code_size = 0; // Size in bytes of generated code
stub_size = 0; // Size in bytes of all stub entries
int code_size = 0; // Size in bytes of generated code
int stub_size = 0; // Size in bytes of all stub entries
// Size in bytes of all relocation entries, including those in local stubs.
// Start with 2-bytes of reloc info for the unvalidated entry point
reloc_size = 1; // Number of relocation entries
int reloc_size = 1; // Number of relocation entries
// Make three passes. The first computes pessimistic blk_starts,
// relative jmp_offset and reloc_size information. The second performs
@ -479,6 +479,10 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
// a relocation index.
// The CodeBuffer will expand the locs array if this estimate is too low.
reloc_size *= 10 / sizeof(relocInfo);
buf_sizes._reloc = reloc_size;
buf_sizes._code = code_size;
buf_sizes._stub = stub_size;
}
//------------------------------FillLocArray-----------------------------------
@ -490,8 +494,8 @@ static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Lo
// This should never have accepted Bad before
assert(OptoReg::is_valid(regnum), "location must be valid");
return (OptoReg::is_reg(regnum))
? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
: new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum)));
? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
: new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum)));
}
@ -610,12 +614,12 @@ void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
}
#endif //_LP64
else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) &&
OptoReg::is_reg(regnum) ) {
OptoReg::is_reg(regnum) ) {
array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double()
? Location::float_in_dbl : Location::normal ));
? Location::float_in_dbl : Location::normal ));
} else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) {
array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long
? Location::int_in_long : Location::normal ));
? Location::int_in_long : Location::normal ));
} else if( t->base() == Type::NarrowOop ) {
array->append(new_loc_value( _regalloc, regnum, Location::narrowoop ));
} else {
@ -626,48 +630,48 @@ void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
// No register. It must be constant data.
switch (t->base()) {
case Type::Half: // Second half of a double
ShouldNotReachHere(); // Caller should skip 2nd halves
break;
case Type::AnyPtr:
array->append(new ConstantOopWriteValue(NULL));
break;
case Type::AryPtr:
case Type::InstPtr: // fall through
array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding()));
break;
case Type::NarrowOop:
if (t == TypeNarrowOop::NULL_PTR) {
case Type::Half: // Second half of a double
ShouldNotReachHere(); // Caller should skip 2nd halves
break;
case Type::AnyPtr:
array->append(new ConstantOopWriteValue(NULL));
} else {
array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
}
break;
case Type::Int:
array->append(new ConstantIntValue(t->is_int()->get_con()));
break;
case Type::RawPtr:
// A return address (T_ADDRESS).
assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
break;
case Type::AryPtr:
case Type::InstPtr: // fall through
array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding()));
break;
case Type::NarrowOop:
if (t == TypeNarrowOop::NULL_PTR) {
array->append(new ConstantOopWriteValue(NULL));
} else {
array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
}
break;
case Type::Int:
array->append(new ConstantIntValue(t->is_int()->get_con()));
break;
case Type::RawPtr:
// A return address (T_ADDRESS).
assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
#ifdef _LP64
// Must be restored to the full-width 64-bit stack slot.
array->append(new ConstantLongValue(t->is_ptr()->get_con()));
// Must be restored to the full-width 64-bit stack slot.
array->append(new ConstantLongValue(t->is_ptr()->get_con()));
#else
array->append(new ConstantIntValue(t->is_ptr()->get_con()));
array->append(new ConstantIntValue(t->is_ptr()->get_con()));
#endif
break;
case Type::FloatCon: {
float f = t->is_float_constant()->getf();
array->append(new ConstantIntValue(jint_cast(f)));
break;
}
case Type::DoubleCon: {
jdouble d = t->is_double_constant()->getd();
break;
case Type::FloatCon: {
float f = t->is_float_constant()->getf();
array->append(new ConstantIntValue(jint_cast(f)));
break;
}
case Type::DoubleCon: {
jdouble d = t->is_double_constant()->getd();
#ifdef _LP64
array->append(new ConstantIntValue((jint)0));
array->append(new ConstantDoubleValue(d));
array->append(new ConstantIntValue((jint)0));
array->append(new ConstantDoubleValue(d));
#else
// Repack the double as two jints.
// Repack the double as two jints.
// The convention the interpreter uses is that the second local
// holds the first raw word of the native double representation.
// This is actually reasonable, since locals and stack arrays
@ -679,15 +683,15 @@ void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
array->append(new ConstantIntValue(acc.words[1]));
array->append(new ConstantIntValue(acc.words[0]));
#endif
break;
}
case Type::Long: {
jlong d = t->is_long()->get_con();
break;
}
case Type::Long: {
jlong d = t->is_long()->get_con();
#ifdef _LP64
array->append(new ConstantIntValue((jint)0));
array->append(new ConstantLongValue(d));
array->append(new ConstantIntValue((jint)0));
array->append(new ConstantLongValue(d));
#else
// Repack the long as two jints.
// Repack the long as two jints.
// The convention the interpreter uses is that the second local
// holds the first raw word of the native double representation.
// This is actually reasonable, since locals and stack arrays
@ -699,14 +703,14 @@ void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
array->append(new ConstantIntValue(acc.words[1]));
array->append(new ConstantIntValue(acc.words[0]));
#endif
break;
}
case Type::Top: // Add an illegal value here
array->append(new LocationValue(Location()));
break;
default:
ShouldNotReachHere();
break;
break;
}
case Type::Top: // Add an illegal value here
array->append(new LocationValue(Location()));
break;
default:
ShouldNotReachHere();
break;
}
}
@ -871,58 +875,58 @@ void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) {
// A simplified version of Process_OopMap_Node, to handle non-safepoints.
class NonSafepointEmitter {
Compile* C;
JVMState* _pending_jvms;
int _pending_offset;
Compile* C;
JVMState* _pending_jvms;
int _pending_offset;
void emit_non_safepoint();
void emit_non_safepoint();
public:
NonSafepointEmitter(Compile* compile) {
this->C = compile;
_pending_jvms = NULL;
_pending_offset = 0;
}
NonSafepointEmitter(Compile* compile) {
this->C = compile;
_pending_jvms = NULL;
_pending_offset = 0;
}
void observe_instruction(Node* n, int pc_offset) {
if (!C->debug_info()->recording_non_safepoints()) return;
void observe_instruction(Node* n, int pc_offset) {
if (!C->debug_info()->recording_non_safepoints()) return;
Node_Notes* nn = C->node_notes_at(n->_idx);
if (nn == NULL || nn->jvms() == NULL) return;
if (_pending_jvms != NULL &&
_pending_jvms->same_calls_as(nn->jvms())) {
// Repeated JVMS? Stretch it up here.
_pending_offset = pc_offset;
} else {
Node_Notes* nn = C->node_notes_at(n->_idx);
if (nn == NULL || nn->jvms() == NULL) return;
if (_pending_jvms != NULL &&
_pending_jvms->same_calls_as(nn->jvms())) {
// Repeated JVMS? Stretch it up here.
_pending_offset = pc_offset;
} else {
if (_pending_jvms != NULL &&
_pending_offset < pc_offset) {
emit_non_safepoint();
}
_pending_jvms = NULL;
if (pc_offset > C->debug_info()->last_pc_offset()) {
// This is the only way _pending_jvms can become non-NULL:
_pending_jvms = nn->jvms();
_pending_offset = pc_offset;
}
}
}
// Stay out of the way of real safepoints:
void observe_safepoint(JVMState* jvms, int pc_offset) {
if (_pending_jvms != NULL &&
!_pending_jvms->same_calls_as(jvms) &&
_pending_offset < pc_offset) {
emit_non_safepoint();
}
_pending_jvms = NULL;
if (pc_offset > C->debug_info()->last_pc_offset()) {
// This is the only way _pending_jvms can become non-NULL:
_pending_jvms = nn->jvms();
_pending_offset = pc_offset;
}
void flush_at_end() {
if (_pending_jvms != NULL) {
emit_non_safepoint();
}
_pending_jvms = NULL;
}
}
// Stay out of the way of real safepoints:
void observe_safepoint(JVMState* jvms, int pc_offset) {
if (_pending_jvms != NULL &&
!_pending_jvms->same_calls_as(jvms) &&
_pending_offset < pc_offset) {
emit_non_safepoint();
}
_pending_jvms = NULL;
}
void flush_at_end() {
if (_pending_jvms != NULL) {
emit_non_safepoint();
}
_pending_jvms = NULL;
}
};
void NonSafepointEmitter::emit_non_safepoint() {
@ -952,15 +956,11 @@ void NonSafepointEmitter::emit_non_safepoint() {
}
//------------------------------init_buffer------------------------------------
CodeBuffer* Compile::init_buffer(uint* blk_starts) {
void Compile::estimate_buffer_size(int& const_req) {
// Set the initially allocated size
int code_req = initial_code_capacity;
int locs_req = initial_locs_capacity;
int stub_req = initial_stub_capacity;
int const_req = initial_const_capacity;
const_req = initial_const_capacity;
int pad_req = NativeCall::instruction_size;
// The extra spacing after the code is necessary on some platforms.
// Sometimes we need to patch in a jump after the last instruction,
// if the nmethod has been deoptimized. (See 4932387, 4894843.)
@ -972,7 +972,7 @@ CodeBuffer* Compile::init_buffer(uint* blk_starts) {
// Compute prolog code size
_method_size = 0;
_frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize;
_frame_slots = OptoReg::reg2stack(_matcher->_old_SP) + _regalloc->_framesize;
#if defined(IA64) && !defined(AIX)
if (save_argument_registers()) {
// 4815101: this is a stub with implicit and unknown precision fp args.
@ -1021,11 +1021,18 @@ CodeBuffer* Compile::init_buffer(uint* blk_starts) {
// Initialize the space for the BufferBlob used to find and verify
// instruction size in MachNode::emit_size()
init_scratch_buffer_blob(const_req);
if (failing()) return NULL; // Out of memory
}
// Pre-compute the length of blocks and replace
// long branches with short if machine supports it.
shorten_branches(blk_starts, code_req, locs_req, stub_req);
CodeBuffer* Compile::init_buffer(BufferSizingData& buf_sizes) {
int stub_req = buf_sizes._stub;
int code_req = buf_sizes._code;
int const_req = buf_sizes._const;
int pad_req = NativeCall::instruction_size;
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
stub_req += bs->estimate_stub_size();
// nmethod and CodeBuffer count stubs & constants as part of method's code.
// class HandlerImpl is platform-specific and defined in the *.ad files.
@ -1038,18 +1045,18 @@ CodeBuffer* Compile::init_buffer(uint* blk_starts) {
code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion
int total_req =
const_req +
code_req +
pad_req +
stub_req +
exception_handler_req +
deopt_handler_req; // deopt handler
const_req +
code_req +
pad_req +
stub_req +
exception_handler_req +
deopt_handler_req; // deopt handler
if (has_method_handle_invokes())
total_req += deopt_handler_req; // deopt MH handler
CodeBuffer* cb = code_buffer();
cb->initialize(total_req, locs_req);
cb->initialize(total_req, buf_sizes._reloc);
// Have we run out of code space?
if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
@ -1268,12 +1275,12 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
Process_OopMap_Node(mach, current_offset);
} // End if safepoint
// If this is a null check, then add the start of the previous instruction to the list
// If this is a null check, then add the start of the previous instruction to the list
else if( mach->is_MachNullCheck() ) {
inct_starts[inct_cnt++] = previous_offset;
}
// If this is a branch, then fill in the label with the target BB's label
// If this is a branch, then fill in the label with the target BB's label
else if (mach->is_MachBranch()) {
// This requires the TRUE branch target be in succs[0]
uint block_num = block->non_connector_successor(0)->_pre_order;
@ -1284,8 +1291,8 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
bool delay_slot_is_used = valid_bundle_info(n) &&
node_bundling(n)->use_unconditional_delay();
if (!delay_slot_is_used && mach->may_be_short_branch()) {
assert(delay_slot == NULL, "not expecting delay slot node");
int br_size = n->size(_regalloc);
assert(delay_slot == NULL, "not expecting delay slot node");
int br_size = n->size(_regalloc);
int offset = blk_starts[block_num] - current_offset;
if (block_num >= i) {
// Current and following block's offset are not
@ -1343,7 +1350,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
}
}
#ifdef ASSERT
// Check that oop-store precedes the card-mark
// Check that oop-store precedes the card-mark
else if (mach->ideal_Opcode() == Op_StoreCM) {
uint storeCM_idx = j;
int count = 0;
@ -1514,6 +1521,10 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
}
#endif
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
bs->emit_stubs(*cb);
if (failing()) return;
#ifndef PRODUCT
// Information on the size of the method, without the extraneous code
Scheduling::increment_method_size(cb->insts_size());
@ -1688,20 +1699,20 @@ uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+
// Initializer for class Scheduling
Scheduling::Scheduling(Arena *arena, Compile &compile)
: _arena(arena),
_cfg(compile.cfg()),
_regalloc(compile.regalloc()),
_scheduled(arena),
_available(arena),
_reg_node(arena),
_pinch_free_list(arena),
_next_node(NULL),
_bundle_instr_count(0),
_bundle_cycle_number(0),
_bundle_use(0, 0, resource_count, &_bundle_use_elements[0])
: _arena(arena),
_cfg(compile.cfg()),
_regalloc(compile.regalloc()),
_scheduled(arena),
_available(arena),
_reg_node(arena),
_pinch_free_list(arena),
_next_node(NULL),
_bundle_instr_count(0),
_bundle_cycle_number(0),
_bundle_use(0, 0, resource_count, &_bundle_use_elements[0])
#ifndef PRODUCT
, _branches(0)
, _unconditional_delays(0)
, _branches(0)
, _unconditional_delays(0)
#endif
{
// Create a MachNopNode
@ -1782,8 +1793,8 @@ void Scheduling::step_and_clear() {
_bundle_use.reset();
memcpy(_bundle_use_elements,
Pipeline_Use::elaborated_elements,
sizeof(Pipeline_Use::elaborated_elements));
Pipeline_Use::elaborated_elements,
sizeof(Pipeline_Use::elaborated_elements));
}
// Perform instruction scheduling and bundling over the sequence of
@ -1810,6 +1821,22 @@ void Compile::ScheduleAndBundle() {
// Walk backwards over each basic block, computing the needed alignment
// Walk over all the basic blocks
scheduling.DoScheduling();
#ifndef PRODUCT
if (trace_opto_output()) {
tty->print("\n---- After ScheduleAndBundle ----\n");
for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
tty->print("\nBB#%03d:\n", i);
Block* block = _cfg->get_block(i);
for (uint j = 0; j < block->number_of_nodes(); j++) {
Node* n = block->get_node(j);
OptoReg::Name reg = _regalloc->get_reg_first(n);
tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
n->dump();
}
}
}
#endif
}
// Compute the latency of all the instructions. This is fairly simple,
@ -1878,7 +1905,7 @@ bool Scheduling::NodeFitsInBundle(Node *n) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
tty->print("# NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n",
n->_idx, _current_latency[n_idx], _bundle_cycle_number);
n->_idx, _current_latency[n_idx], _bundle_cycle_number);
#endif
return (false);
}
@ -1895,7 +1922,7 @@ bool Scheduling::NodeFitsInBundle(Node *n) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
tty->print("# NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n",
n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
#endif
return (false);
}
@ -2103,12 +2130,12 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
// Don't allow safepoints in the branch shadow, that will
// cause a number of difficulties
if ( avail_pipeline->instructionCount() == 1 &&
!avail_pipeline->hasMultipleBundles() &&
!avail_pipeline->hasBranchDelay() &&
Pipeline::instr_has_unit_size() &&
d->size(_regalloc) == Pipeline::instr_unit_size() &&
NodeFitsInBundle(d) &&
!node_bundling(d)->used_in_delay()) {
!avail_pipeline->hasMultipleBundles() &&
!avail_pipeline->hasBranchDelay() &&
Pipeline::instr_has_unit_size() &&
d->size(_regalloc) == Pipeline::instr_unit_size() &&
NodeFitsInBundle(d) &&
!node_bundling(d)->used_in_delay()) {
if (d->is_Mach() && !d->is_MachSafePoint()) {
// A node that fits in the delay slot was found, so we need to
@ -2153,13 +2180,13 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
// step of the bundles
if (!NodeFitsInBundle(n)) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
tty->print("# *** STEP(branch won't fit) ***\n");
if (_cfg->C->trace_opto_output())
tty->print("# *** STEP(branch won't fit) ***\n");
#endif
// Update the state information
_bundle_instr_count = 0;
_bundle_cycle_number += 1;
_bundle_use.step(1);
// Update the state information
_bundle_instr_count = 0;
_bundle_cycle_number += 1;
_bundle_use.step(1);
}
}
@ -2205,8 +2232,8 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
tty->print("# *** STEP(%d >= %d instructions) ***\n",
instruction_count + _bundle_instr_count,
Pipeline::_max_instrs_per_cycle);
instruction_count + _bundle_instr_count,
Pipeline::_max_instrs_per_cycle);
#endif
step(1);
}
@ -2412,7 +2439,7 @@ void Scheduling::DoScheduling() {
}
assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, "");
if( last->is_Catch() ||
(last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
(last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
// There might be a prior call. Skip it.
while (_bb_start < _bb_end && bb->get_node(--_bb_end)->is_MachProj());
} else if( last->is_MachNullCheck() ) {
@ -2482,7 +2509,7 @@ void Scheduling::DoScheduling() {
}
#endif
#ifdef ASSERT
verify_good_schedule(bb,"after block local scheduling");
verify_good_schedule(bb,"after block local scheduling");
#endif
}
@ -2830,31 +2857,31 @@ void Scheduling::ComputeRegisterAntidependencies(Block *b) {
//
void Scheduling::garbage_collect_pinch_nodes() {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
#endif
int trace_cnt = 0;
for (uint k = 0; k < _reg_node.Size(); k++) {
Node* pinch = _reg_node[k];
if ((pinch != NULL) && pinch->Opcode() == Op_Node &&
// no predecence input edges
(pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
cleanup_pinch(pinch);
_pinch_free_list.push(pinch);
_reg_node.map(k, NULL);
int trace_cnt = 0;
for (uint k = 0; k < _reg_node.Size(); k++) {
Node* pinch = _reg_node[k];
if ((pinch != NULL) && pinch->Opcode() == Op_Node &&
// no predecence input edges
(pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
cleanup_pinch(pinch);
_pinch_free_list.push(pinch);
_reg_node.map(k, NULL);
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) {
trace_cnt++;
if (trace_cnt > 40) {
tty->print("\n");
trace_cnt = 0;
}
tty->print(" %d", pinch->_idx);
if (_cfg->C->trace_opto_output()) {
trace_cnt++;
if (trace_cnt > 40) {
tty->print("\n");
trace_cnt = 0;
}
#endif
tty->print(" %d", pinch->_idx);
}
#endif
}
}
#ifndef PRODUCT
if (_cfg->C->trace_opto_output()) tty->print("\n");
if (_cfg->C->trace_opto_output()) tty->print("\n");
#endif
}
@ -2891,19 +2918,19 @@ void Scheduling::dump_available() const {
void Scheduling::print_statistics() {
// Print the size added by nops for bundling
tty->print("Nops added %d bytes to total of %d bytes",
_total_nop_size, _total_method_size);
_total_nop_size, _total_method_size);
if (_total_method_size > 0)
tty->print(", for %.2f%%",
((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
tty->print("\n");
// Print the number of branch shadows filled
if (Pipeline::_branch_has_delay_slot) {
tty->print("Of %d branches, %d had unconditional delay slots filled",
_total_branches, _total_unconditional_delays);
_total_branches, _total_unconditional_delays);
if (_total_branches > 0)
tty->print(", for %.2f%%",
((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
tty->print("\n");
}
@ -2917,6 +2944,6 @@ void Scheduling::print_statistics() {
if (total_bundles > 0)
tty->print("Average ILP (excluding nops) is %.2f\n",
((double)total_instructions) / ((double)total_bundles));
((double)total_instructions) / ((double)total_bundles));
}
#endif

View File

@ -40,7 +40,6 @@ class PhaseCFG;
class PhaseChaitin;
class Pipeline_Use_Element;
class Pipeline_Use;
#ifndef PRODUCT
#define DEBUG_ARG(x) , x
#else
@ -49,10 +48,7 @@ class Pipeline_Use;
// Define the initial sizes for allocation of the resizable code buffer
enum {
initial_code_capacity = 16 * 1024,
initial_stub_capacity = 4 * 1024,
initial_const_capacity = 4 * 1024,
initial_locs_capacity = 3 * 1024
initial_const_capacity = 4 * 1024
};
//------------------------------Scheduling----------------------------------

View File

@ -1648,14 +1648,14 @@ void PhaseIterGVN::add_users_to_worklist( Node *n ) {
// of the mirror load depends on the type of 'n'. See LoadNode::Value().
// LoadBarrier?(LoadP(LoadP(AddP(foo:Klass, #java_mirror))))
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
bool has_load_barriers = bs->has_load_barriers();
bool has_load_barrier_nodes = bs->has_load_barrier_nodes();
if (use_op == Op_LoadP && use->bottom_type()->isa_rawptr()) {
for (DUIterator_Fast i2max, i2 = use->fast_outs(i2max); i2 < i2max; i2++) {
Node* u = use->fast_out(i2);
const Type* ut = u->bottom_type();
if (u->Opcode() == Op_LoadP && ut->isa_instptr()) {
if (has_load_barriers) {
if (has_load_barrier_nodes) {
// Search for load barriers behind the load
for (DUIterator_Fast i3max, i3 = u->fast_outs(i3max); i3 < i3max; i3++) {
Node* b = u->fast_out(i3);
@ -1808,14 +1808,14 @@ void PhaseCCP::analyze() {
// Loading the java mirror from a Klass requires two loads and the type
// of the mirror load depends on the type of 'n'. See LoadNode::Value().
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
bool has_load_barriers = bs->has_load_barriers();
bool has_load_barrier_nodes = bs->has_load_barrier_nodes();
if (m_op == Op_LoadP && m->bottom_type()->isa_rawptr()) {
for (DUIterator_Fast i2max, i2 = m->fast_outs(i2max); i2 < i2max; i2++) {
Node* u = m->fast_out(i2);
const Type* ut = u->bottom_type();
if (u->Opcode() == Op_LoadP && ut->isa_instptr() && ut != type(u)) {
if (has_load_barriers) {
if (has_load_barrier_nodes) {
// Search for load barriers behind the load
for (DUIterator_Fast i3max, i3 = u->fast_outs(i3max); i3 < i3max; i3++) {
Node* b = u->fast_out(i3);