8230565: ZGC: Redesign C2 load barrier to expand on the MachNode level
Co-authored-by: Per Liden <per.liden@oracle.com> Co-authored-by: Stefan Karlsson <stefan.karlsson@oracle.com> Co-authored-by: Nils Eliasson <nils.eliasson@oracle.com> Reviewed-by: pliden, stefank, neliasso
This commit is contained in:
parent
dcc9cc3fdd
commit
42885307f6
@ -2513,17 +2513,8 @@ void Compile::reshape_address(AddPNode* addp) {
|
||||
__ INSN(REG, as_Register(BASE)); \
|
||||
}
|
||||
|
||||
typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
|
||||
typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
|
||||
typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
|
||||
MacroAssembler::SIMD_RegVariant T, const Address &adr);
|
||||
|
||||
// Used for all non-volatile memory accesses. The use of
|
||||
// $mem->opcode() to discover whether this pattern uses sign-extended
|
||||
// offsets is something of a kludge.
|
||||
static void loadStore(MacroAssembler masm, mem_insn insn,
|
||||
Register reg, int opcode,
|
||||
Register base, int index, int size, int disp)
|
||||
static Address mem2address(int opcode, Register base, int index, int size, int disp)
|
||||
{
|
||||
Address::extend scale;
|
||||
|
||||
@ -2542,16 +2533,34 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
|
||||
}
|
||||
|
||||
if (index == -1) {
|
||||
(masm.*insn)(reg, Address(base, disp));
|
||||
return Address(base, disp);
|
||||
} else {
|
||||
assert(disp == 0, "unsupported address mode: disp = %d", disp);
|
||||
(masm.*insn)(reg, Address(base, as_Register(index), scale));
|
||||
return Address(base, as_Register(index), scale);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
|
||||
typedef void (MacroAssembler::* mem_insn2)(Register Rt, Register adr);
|
||||
typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
|
||||
typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
|
||||
MacroAssembler::SIMD_RegVariant T, const Address &adr);
|
||||
|
||||
// Used for all non-volatile memory accesses. The use of
|
||||
// $mem->opcode() to discover whether this pattern uses sign-extended
|
||||
// offsets is something of a kludge.
|
||||
static void loadStore(MacroAssembler masm, mem_insn insn,
|
||||
Register reg, int opcode,
|
||||
Register base, int index, int size, int disp)
|
||||
{
|
||||
Address addr = mem2address(opcode, base, index, size, disp);
|
||||
(masm.*insn)(reg, addr);
|
||||
}
|
||||
|
||||
static void loadStore(MacroAssembler masm, mem_float_insn insn,
|
||||
FloatRegister reg, int opcode,
|
||||
Register base, int index, int size, int disp)
|
||||
FloatRegister reg, int opcode,
|
||||
Register base, int index, int size, int disp)
|
||||
{
|
||||
Address::extend scale;
|
||||
|
||||
@ -2573,8 +2582,8 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
|
||||
}
|
||||
|
||||
static void loadStore(MacroAssembler masm, mem_vector_insn insn,
|
||||
FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
|
||||
int opcode, Register base, int index, int size, int disp)
|
||||
FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
|
||||
int opcode, Register base, int index, int size, int disp)
|
||||
{
|
||||
if (index == -1) {
|
||||
(masm.*insn)(reg, T, Address(base, disp));
|
||||
@ -3791,7 +3800,7 @@ frame %{
|
||||
static const int hi[Op_RegL + 1] = { // enum name
|
||||
0, // Op_Node
|
||||
0, // Op_Set
|
||||
OptoReg::Bad, // Op_RegN
|
||||
OptoReg::Bad, // Op_RegN
|
||||
OptoReg::Bad, // Op_RegI
|
||||
R0_H_num, // Op_RegP
|
||||
OptoReg::Bad, // Op_RegF
|
||||
@ -6923,7 +6932,7 @@ instruct loadRange(iRegINoSp dst, memory mem)
|
||||
instruct loadP(iRegPNoSp dst, memory mem)
|
||||
%{
|
||||
match(Set dst (LoadP mem));
|
||||
predicate(!needs_acquiring_load(n));
|
||||
predicate(!needs_acquiring_load(n) && (n->as_Load()->barrier_data() == 0));
|
||||
|
||||
ins_cost(4 * INSN_COST);
|
||||
format %{ "ldr $dst, $mem\t# ptr" %}
|
||||
@ -7616,6 +7625,7 @@ instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
|
||||
instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
|
||||
%{
|
||||
match(Set dst (LoadP mem));
|
||||
predicate(n->as_Load()->barrier_data() == 0);
|
||||
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
format %{ "ldar $dst, $mem\t# ptr" %}
|
||||
@ -8552,6 +8562,7 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS
|
||||
instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
|
||||
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
ins_cost(2 * VOLATILE_REF_COST);
|
||||
|
||||
effect(KILL cr);
|
||||
@ -8665,7 +8676,7 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL
|
||||
|
||||
instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
|
||||
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
|
||||
@ -8796,6 +8807,7 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne
|
||||
%}
|
||||
|
||||
instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
|
||||
ins_cost(2 * VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
@ -8895,7 +8907,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN
|
||||
%}
|
||||
|
||||
instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
|
||||
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
@ -8996,6 +9008,7 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
|
||||
ins_cost(2 * VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
@ -9103,8 +9116,8 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
|
||||
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
@ -9154,6 +9167,7 @@ instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
|
||||
%}
|
||||
|
||||
instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set prev (GetAndSetP mem newv));
|
||||
ins_cost(2 * VOLATILE_REF_COST);
|
||||
format %{ "atomic_xchg $prev, $newv, [$mem]" %}
|
||||
@ -9197,7 +9211,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
|
||||
%}
|
||||
|
||||
instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
|
||||
match(Set prev (GetAndSetP mem newv));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %}
|
||||
|
@ -24,22 +24,23 @@
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/macroAssembler.inline.hpp"
|
||||
#include "code/codeBlob.hpp"
|
||||
#include "code/vmreg.inline.hpp"
|
||||
#include "gc/z/zBarrier.inline.hpp"
|
||||
#include "gc/z/zBarrierSet.hpp"
|
||||
#include "gc/z/zBarrierSetAssembler.hpp"
|
||||
#include "gc/z/zBarrierSetRuntime.hpp"
|
||||
#include "gc/z/zThreadLocalData.hpp"
|
||||
#include "memory/resourceArea.hpp"
|
||||
#include "runtime/sharedRuntime.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#ifdef COMPILER1
|
||||
#include "c1/c1_LIRAssembler.hpp"
|
||||
#include "c1/c1_MacroAssembler.hpp"
|
||||
#include "gc/z/c1/zBarrierSetC1.hpp"
|
||||
#endif // COMPILER1
|
||||
|
||||
#include "gc/z/zThreadLocalData.hpp"
|
||||
|
||||
ZBarrierSetAssembler::ZBarrierSetAssembler() :
|
||||
_load_barrier_slow_stub(),
|
||||
_load_barrier_weak_slow_stub() {}
|
||||
#ifdef COMPILER2
|
||||
#include "gc/z/c2/zBarrierSetC2.hpp"
|
||||
#endif // COMPILER2
|
||||
|
||||
#ifdef PRODUCT
|
||||
#define BLOCK_COMMENT(str) /* nothing */
|
||||
@ -66,7 +67,7 @@ void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
|
||||
assert_different_registers(rscratch1, rscratch2, src.base());
|
||||
assert_different_registers(rscratch1, rscratch2, dst);
|
||||
|
||||
RegSet savedRegs = RegSet::range(r0,r28) - RegSet::of(dst, rscratch1, rscratch2);
|
||||
RegSet savedRegs = RegSet::range(r0, r28) - RegSet::of(dst, rscratch1, rscratch2);
|
||||
|
||||
Label done;
|
||||
|
||||
@ -206,7 +207,8 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
|
||||
|
||||
// The Address offset is too large to direct load - -784. Our range is +127, -128.
|
||||
__ mov(tmp, (long int)(in_bytes(ZThreadLocalData::address_bad_mask_offset()) -
|
||||
in_bytes(JavaThread::jni_environment_offset())));
|
||||
in_bytes(JavaThread::jni_environment_offset())));
|
||||
|
||||
// Load address bad mask
|
||||
__ add(tmp, jni_env, tmp);
|
||||
__ ldr(tmp, Address(tmp));
|
||||
@ -294,12 +296,12 @@ void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler*
|
||||
__ prologue("zgc_load_barrier stub", false);
|
||||
|
||||
// We don't use push/pop_clobbered_registers() - we need to pull out the result from r0.
|
||||
for (int i = 0; i < 32; i +=2) {
|
||||
__ stpd(as_FloatRegister(i), as_FloatRegister(i+1), Address(__ pre(sp,-16)));
|
||||
for (int i = 0; i < 32; i += 2) {
|
||||
__ stpd(as_FloatRegister(i), as_FloatRegister(i + 1), Address(__ pre(sp,-16)));
|
||||
}
|
||||
|
||||
RegSet saveRegs = RegSet::range(r0,r28) - RegSet::of(r0);
|
||||
__ push(saveRegs, sp);
|
||||
const RegSet save_regs = RegSet::range(r1, r28);
|
||||
__ push(save_regs, sp);
|
||||
|
||||
// Setup arguments
|
||||
__ load_parameter(0, c_rarg0);
|
||||
@ -307,98 +309,161 @@ void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler*
|
||||
|
||||
__ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
|
||||
|
||||
__ pop(saveRegs, sp);
|
||||
__ pop(save_regs, sp);
|
||||
|
||||
for (int i = 30; i >0; i -=2) {
|
||||
__ ldpd(as_FloatRegister(i), as_FloatRegister(i+1), Address(__ post(sp, 16)));
|
||||
}
|
||||
for (int i = 30; i >= 0; i -= 2) {
|
||||
__ ldpd(as_FloatRegister(i), as_FloatRegister(i + 1), Address(__ post(sp, 16)));
|
||||
}
|
||||
|
||||
__ epilogue();
|
||||
}
|
||||
#endif // COMPILER1
|
||||
|
||||
#ifdef COMPILER2
|
||||
|
||||
OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
|
||||
if (!OptoReg::is_reg(opto_reg)) {
|
||||
return OptoReg::Bad;
|
||||
}
|
||||
|
||||
const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
|
||||
if (vm_reg->is_FloatRegister()) {
|
||||
return opto_reg & ~1;
|
||||
}
|
||||
|
||||
return opto_reg;
|
||||
}
|
||||
|
||||
#undef __
|
||||
#define __ cgen->assembler()->
|
||||
#define __ _masm->
|
||||
|
||||
// Generates a register specific stub for calling
|
||||
// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or
|
||||
// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded().
|
||||
//
|
||||
// The raddr register serves as both input and output for this stub. When the stub is
|
||||
// called the raddr register contains the object field address (oop*) where the bad oop
|
||||
// was loaded from, which caused the slow path to be taken. On return from the stub the
|
||||
// raddr register contains the good/healed oop returned from
|
||||
// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or
|
||||
// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded().
|
||||
static address generate_load_barrier_stub(StubCodeGenerator* cgen, Register raddr, DecoratorSet decorators) {
|
||||
// Don't generate stub for invalid registers
|
||||
if (raddr == zr || raddr == r29 || raddr == r30) {
|
||||
return NULL;
|
||||
class ZSaveLiveRegisters {
|
||||
private:
|
||||
MacroAssembler* const _masm;
|
||||
RegSet _gp_regs;
|
||||
RegSet _fp_regs;
|
||||
|
||||
public:
|
||||
void initialize(ZLoadBarrierStubC2* stub) {
|
||||
// Create mask of live registers
|
||||
RegMask live = stub->live();
|
||||
|
||||
// Record registers that needs to be saved/restored
|
||||
while (live.is_NotEmpty()) {
|
||||
const OptoReg::Name opto_reg = live.find_first_elem();
|
||||
live.Remove(opto_reg);
|
||||
if (OptoReg::is_reg(opto_reg)) {
|
||||
const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
|
||||
if (vm_reg->is_Register()) {
|
||||
_gp_regs += RegSet::of(vm_reg->as_Register());
|
||||
} else if (vm_reg->is_FloatRegister()) {
|
||||
_fp_regs += RegSet::of((Register)vm_reg->as_FloatRegister());
|
||||
} else {
|
||||
fatal("Unknown register type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove C-ABI SOE registers, scratch regs and _ref register that will be updated
|
||||
_gp_regs -= RegSet::range(r19, r30) + RegSet::of(r8, r9, stub->ref());
|
||||
}
|
||||
|
||||
// Create stub name
|
||||
char name[64];
|
||||
const bool weak = (decorators & ON_WEAK_OOP_REF) != 0;
|
||||
os::snprintf(name, sizeof(name), "zgc_load_barrier%s_stub_%s", weak ? "_weak" : "", raddr->name());
|
||||
ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
|
||||
_masm(masm),
|
||||
_gp_regs(),
|
||||
_fp_regs() {
|
||||
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(cgen, "StubRoutines", os::strdup(name, mtCode));
|
||||
address start = __ pc();
|
||||
// Figure out what registers to save/restore
|
||||
initialize(stub);
|
||||
|
||||
// Save live registers
|
||||
RegSet savedRegs = RegSet::range(r0,r18) - RegSet::of(raddr);
|
||||
|
||||
__ enter();
|
||||
__ push(savedRegs, sp);
|
||||
|
||||
// Setup arguments
|
||||
if (raddr != c_rarg1) {
|
||||
__ mov(c_rarg1, raddr);
|
||||
// Save registers
|
||||
__ push(_gp_regs, sp);
|
||||
__ push_fp(_fp_regs, sp);
|
||||
}
|
||||
|
||||
__ ldr(c_rarg0, Address(raddr));
|
||||
~ZSaveLiveRegisters() {
|
||||
// Restore registers
|
||||
__ pop_fp(_fp_regs, sp);
|
||||
__ pop(_gp_regs, sp);
|
||||
}
|
||||
};
|
||||
|
||||
// Call barrier function
|
||||
__ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), c_rarg0, c_rarg1);
|
||||
#undef __
|
||||
#define __ _masm->
|
||||
|
||||
// Move result returned in r0 to raddr, if needed
|
||||
if (raddr != r0) {
|
||||
__ mov(raddr, r0);
|
||||
class ZSetupArguments {
|
||||
private:
|
||||
MacroAssembler* const _masm;
|
||||
const Register _ref;
|
||||
const Address _ref_addr;
|
||||
|
||||
public:
|
||||
ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
|
||||
_masm(masm),
|
||||
_ref(stub->ref()),
|
||||
_ref_addr(stub->ref_addr()) {
|
||||
|
||||
// Setup arguments
|
||||
if (_ref_addr.base() == noreg) {
|
||||
// No self healing
|
||||
if (_ref != c_rarg0) {
|
||||
__ mov(c_rarg0, _ref);
|
||||
}
|
||||
__ mov(c_rarg1, 0);
|
||||
} else {
|
||||
// Self healing
|
||||
if (_ref == c_rarg0) {
|
||||
// _ref is already at correct place
|
||||
__ lea(c_rarg1, _ref_addr);
|
||||
} else if (_ref != c_rarg1) {
|
||||
// _ref is in wrong place, but not in c_rarg1, so fix it first
|
||||
__ lea(c_rarg1, _ref_addr);
|
||||
__ mov(c_rarg0, _ref);
|
||||
} else if (_ref_addr.base() != c_rarg0 && _ref_addr.index() != c_rarg0) {
|
||||
assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0");
|
||||
__ mov(c_rarg0, _ref);
|
||||
__ lea(c_rarg1, _ref_addr);
|
||||
} else {
|
||||
assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0");
|
||||
if (_ref_addr.base() == c_rarg0 || _ref_addr.index() == c_rarg0) {
|
||||
__ mov(rscratch2, c_rarg1);
|
||||
__ lea(c_rarg1, _ref_addr);
|
||||
__ mov(c_rarg0, rscratch2);
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__ pop(savedRegs, sp);
|
||||
__ leave();
|
||||
__ ret(lr);
|
||||
~ZSetupArguments() {
|
||||
// Transfer result
|
||||
if (_ref != r0) {
|
||||
__ mov(_ref, r0);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return start;
|
||||
#undef __
|
||||
#define __ masm->
|
||||
|
||||
void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
|
||||
BLOCK_COMMENT("ZLoadBarrierStubC2");
|
||||
|
||||
// Stub entry
|
||||
__ bind(*stub->entry());
|
||||
|
||||
{
|
||||
ZSaveLiveRegisters save_live_registers(masm, stub);
|
||||
ZSetupArguments setup_arguments(masm, stub);
|
||||
__ mov(rscratch1, stub->slow_path());
|
||||
__ blr(rscratch1);
|
||||
}
|
||||
|
||||
// Stub exit
|
||||
__ b(*stub->continuation());
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
static void barrier_stubs_init_inner(const char* label, const DecoratorSet decorators, address* stub) {
|
||||
const int nregs = 28; // Exclude FP, XZR, SP from calculation.
|
||||
const int code_size = nregs * 254; // Rough estimate of code size
|
||||
|
||||
ResourceMark rm;
|
||||
|
||||
CodeBuffer buf(BufferBlob::create(label, code_size));
|
||||
StubCodeGenerator cgen(&buf);
|
||||
|
||||
for (int i = 0; i < nregs; i++) {
|
||||
const Register reg = as_Register(i);
|
||||
stub[i] = generate_load_barrier_stub(&cgen, reg, decorators);
|
||||
}
|
||||
}
|
||||
|
||||
void ZBarrierSetAssembler::barrier_stubs_init() {
|
||||
barrier_stubs_init_inner("zgc_load_barrier_stubs", ON_STRONG_OOP_REF, _load_barrier_slow_stub);
|
||||
barrier_stubs_init_inner("zgc_load_barrier_weak_stubs", ON_WEAK_OOP_REF, _load_barrier_weak_slow_stub);
|
||||
}
|
||||
|
||||
address ZBarrierSetAssembler::load_barrier_slow_stub(Register reg) {
|
||||
return _load_barrier_slow_stub[reg->encoding()];
|
||||
}
|
||||
|
||||
address ZBarrierSetAssembler::load_barrier_weak_slow_stub(Register reg) {
|
||||
return _load_barrier_weak_slow_stub[reg->encoding()];
|
||||
}
|
||||
#endif // COMPILER2
|
||||
|
@ -24,6 +24,12 @@
|
||||
#ifndef CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP
|
||||
#define CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP
|
||||
|
||||
#include "code/vmreg.hpp"
|
||||
#include "oops/accessDecorators.hpp"
|
||||
#ifdef COMPILER2
|
||||
#include "opto/optoreg.hpp"
|
||||
#endif // COMPILER2
|
||||
|
||||
#ifdef COMPILER1
|
||||
class LIR_Assembler;
|
||||
class LIR_OprDesc;
|
||||
@ -32,14 +38,13 @@ class StubAssembler;
|
||||
class ZLoadBarrierStubC1;
|
||||
#endif // COMPILER1
|
||||
|
||||
#ifdef COMPILER2
|
||||
class Node;
|
||||
class ZLoadBarrierStubC2;
|
||||
#endif // COMPILER2
|
||||
|
||||
class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
|
||||
private:
|
||||
address _load_barrier_slow_stub[RegisterImpl::number_of_registers];
|
||||
address _load_barrier_weak_slow_stub[RegisterImpl::number_of_registers];
|
||||
|
||||
public:
|
||||
ZBarrierSetAssembler();
|
||||
|
||||
virtual void load_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
@ -83,10 +88,13 @@ public:
|
||||
DecoratorSet decorators) const;
|
||||
#endif // COMPILER1
|
||||
|
||||
virtual void barrier_stubs_init();
|
||||
#ifdef COMPILER2
|
||||
OptoReg::Name refine_register(const Node* node,
|
||||
OptoReg::Name opto_reg);
|
||||
|
||||
address load_barrier_slow_stub(Register reg);
|
||||
address load_barrier_weak_slow_stub(Register reg);
|
||||
void generate_c2_load_barrier_stub(MacroAssembler* masm,
|
||||
ZLoadBarrierStubC2* stub) const;
|
||||
#endif // COMPILER2
|
||||
};
|
||||
|
||||
#endif // CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP
|
||||
|
@ -24,155 +24,244 @@
|
||||
source_hpp %{
|
||||
|
||||
#include "gc/z/c2/zBarrierSetC2.hpp"
|
||||
#include "gc/z/zThreadLocalData.hpp"
|
||||
|
||||
%}
|
||||
|
||||
source %{
|
||||
|
||||
#include "gc/z/zBarrierSetAssembler.hpp"
|
||||
static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
|
||||
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
|
||||
__ ldr(tmp, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
|
||||
__ andr(tmp, tmp, ref);
|
||||
__ cbnz(tmp, *stub->entry());
|
||||
__ bind(*stub->continuation());
|
||||
}
|
||||
|
||||
static void z_load_barrier_slow_reg(MacroAssembler& _masm, Register dst,
|
||||
Register base, int index, int scale,
|
||||
int disp, bool weak) {
|
||||
const address stub = weak ? ZBarrierSet::assembler()->load_barrier_weak_slow_stub(dst)
|
||||
: ZBarrierSet::assembler()->load_barrier_slow_stub(dst);
|
||||
|
||||
if (index == -1) {
|
||||
if (disp != 0) {
|
||||
__ lea(dst, Address(base, disp));
|
||||
} else {
|
||||
__ mov(dst, base);
|
||||
}
|
||||
} else {
|
||||
Register index_reg = as_Register(index);
|
||||
if (disp == 0) {
|
||||
__ lea(dst, Address(base, index_reg, Address::lsl(scale)));
|
||||
} else {
|
||||
__ lea(dst, Address(base, disp));
|
||||
__ lea(dst, Address(dst, index_reg, Address::lsl(scale)));
|
||||
}
|
||||
}
|
||||
|
||||
__ far_call(RuntimeAddress(stub));
|
||||
static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
|
||||
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
|
||||
__ b(*stub->entry());
|
||||
__ bind(*stub->continuation());
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
//
|
||||
// Execute ZGC load barrier (strong) slow path
|
||||
//
|
||||
instruct loadBarrierSlowReg(iRegP dst, memory src, rFlagsReg cr,
|
||||
vRegD_V0 v0, vRegD_V1 v1, vRegD_V2 v2, vRegD_V3 v3, vRegD_V4 v4,
|
||||
vRegD_V5 v5, vRegD_V6 v6, vRegD_V7 v7, vRegD_V8 v8, vRegD_V9 v9,
|
||||
vRegD_V10 v10, vRegD_V11 v11, vRegD_V12 v12, vRegD_V13 v13, vRegD_V14 v14,
|
||||
vRegD_V15 v15, vRegD_V16 v16, vRegD_V17 v17, vRegD_V18 v18, vRegD_V19 v19,
|
||||
vRegD_V20 v20, vRegD_V21 v21, vRegD_V22 v22, vRegD_V23 v23, vRegD_V24 v24,
|
||||
vRegD_V25 v25, vRegD_V26 v26, vRegD_V27 v27, vRegD_V28 v28, vRegD_V29 v29,
|
||||
vRegD_V30 v30, vRegD_V31 v31) %{
|
||||
match(Set dst (LoadBarrierSlowReg src dst));
|
||||
predicate(!n->as_LoadBarrierSlowReg()->is_weak());
|
||||
// Load Pointer
|
||||
instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (LoadP mem));
|
||||
predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierStrong));
|
||||
effect(TEMP dst, KILL cr);
|
||||
|
||||
effect(KILL cr,
|
||||
KILL v0, KILL v1, KILL v2, KILL v3, KILL v4, KILL v5, KILL v6, KILL v7,
|
||||
KILL v8, KILL v9, KILL v10, KILL v11, KILL v12, KILL v13, KILL v14,
|
||||
KILL v15, KILL v16, KILL v17, KILL v18, KILL v19, KILL v20, KILL v21,
|
||||
KILL v22, KILL v23, KILL v24, KILL v25, KILL v26, KILL v27, KILL v28,
|
||||
KILL v29, KILL v30, KILL v31);
|
||||
ins_cost(4 * INSN_COST);
|
||||
|
||||
format %{ "lea $dst, $src\n\t"
|
||||
"call #ZLoadBarrierSlowPath" %}
|
||||
format %{ "ldr $dst, $mem" %}
|
||||
|
||||
ins_encode %{
|
||||
z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$base$$Register,
|
||||
$src$$index, $src$$scale, $src$$disp, false);
|
||||
const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
__ ldr($dst$$Register, ref_addr);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, false /* weak */);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
|
||||
ins_pipe(iload_reg_mem);
|
||||
%}
|
||||
|
||||
//
|
||||
// Execute ZGC load barrier (weak) slow path
|
||||
//
|
||||
instruct loadBarrierWeakSlowReg(iRegP dst, memory src, rFlagsReg cr,
|
||||
vRegD_V0 v0, vRegD_V1 v1, vRegD_V2 v2, vRegD_V3 v3, vRegD_V4 v4,
|
||||
vRegD_V5 v5, vRegD_V6 v6, vRegD_V7 v7, vRegD_V8 v8, vRegD_V9 v9,
|
||||
vRegD_V10 v10, vRegD_V11 v11, vRegD_V12 v12, vRegD_V13 v13, vRegD_V14 v14,
|
||||
vRegD_V15 v15, vRegD_V16 v16, vRegD_V17 v17, vRegD_V18 v18, vRegD_V19 v19,
|
||||
vRegD_V20 v20, vRegD_V21 v21, vRegD_V22 v22, vRegD_V23 v23, vRegD_V24 v24,
|
||||
vRegD_V25 v25, vRegD_V26 v26, vRegD_V27 v27, vRegD_V28 v28, vRegD_V29 v29,
|
||||
vRegD_V30 v30, vRegD_V31 v31) %{
|
||||
match(Set dst (LoadBarrierSlowReg src dst));
|
||||
predicate(n->as_LoadBarrierSlowReg()->is_weak());
|
||||
// Load Weak Pointer
|
||||
instruct zLoadWeakP(iRegPNoSp dst, memory mem, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (LoadP mem));
|
||||
predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierWeak));
|
||||
effect(TEMP dst, KILL cr);
|
||||
|
||||
effect(KILL cr,
|
||||
KILL v0, KILL v1, KILL v2, KILL v3, KILL v4, KILL v5, KILL v6, KILL v7,
|
||||
KILL v8, KILL v9, KILL v10, KILL v11, KILL v12, KILL v13, KILL v14,
|
||||
KILL v15, KILL v16, KILL v17, KILL v18, KILL v19, KILL v20, KILL v21,
|
||||
KILL v22, KILL v23, KILL v24, KILL v25, KILL v26, KILL v27, KILL v28,
|
||||
KILL v29, KILL v30, KILL v31);
|
||||
ins_cost(4 * INSN_COST);
|
||||
|
||||
format %{ "lea $dst, $src\n\t"
|
||||
"call #ZLoadBarrierSlowPath" %}
|
||||
format %{ "ldr $dst, $mem" %}
|
||||
|
||||
ins_encode %{
|
||||
z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$base$$Register,
|
||||
$src$$index, $src$$scale, $src$$disp, true);
|
||||
const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
__ ldr($dst$$Register, ref_addr);
|
||||
z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, true /* weak */);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
|
||||
ins_pipe(iload_reg_mem);
|
||||
%}
|
||||
|
||||
// Load Pointer Volatile
|
||||
instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (LoadP mem));
|
||||
predicate(UseZGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
|
||||
effect(TEMP dst, KILL cr);
|
||||
|
||||
// Specialized versions of compareAndExchangeP that adds a keepalive that is consumed
|
||||
// but doesn't affect output.
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
|
||||
format %{ "ldar $dst, $mem\t" %}
|
||||
|
||||
ins_encode %{
|
||||
__ ldar($dst$$Register, $mem$$Register);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
z_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, false /* weak */);
|
||||
}
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
|
||||
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
|
||||
predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
|
||||
effect(KILL cr, TEMP_DEF res);
|
||||
|
||||
instruct z_compareAndExchangeP(iRegPNoSp res, indirect mem,
|
||||
iRegP oldval, iRegP newval, iRegP keepalive,
|
||||
rFlagsReg cr) %{
|
||||
match(Set res (ZCompareAndExchangeP (Binary mem keepalive) (Binary oldval newval)));
|
||||
ins_cost(2 * VOLATILE_REF_COST);
|
||||
|
||||
format %{ "cmpxchg $mem, $oldval, $newval\n\t"
|
||||
"cset $res, EQ" %}
|
||||
|
||||
ins_encode %{
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
|
||||
false /* acquire */, true /* release */, false /* weak */, rscratch2);
|
||||
__ cset($res$$Register, Assembler::EQ);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
Label good;
|
||||
__ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
|
||||
__ andr(rscratch1, rscratch1, rscratch2);
|
||||
__ cbz(rscratch1, good);
|
||||
z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */);
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
|
||||
false /* acquire */, true /* release */, false /* weak */, rscratch2);
|
||||
__ cset($res$$Register, Assembler::EQ);
|
||||
__ bind(good);
|
||||
}
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
|
||||
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
|
||||
predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
|
||||
effect(KILL cr, TEMP_DEF res);
|
||||
|
||||
ins_cost(2 * VOLATILE_REF_COST);
|
||||
|
||||
format %{ "cmpxchg $mem, $oldval, $newval\n\t"
|
||||
"cset $res, EQ" %}
|
||||
|
||||
ins_encode %{
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
|
||||
true /* acquire */, true /* release */, false /* weak */, rscratch2);
|
||||
__ cset($res$$Register, Assembler::EQ);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
Label good;
|
||||
__ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
|
||||
__ andr(rscratch1, rscratch1, rscratch2);
|
||||
__ cbz(rscratch1, good);
|
||||
z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ );
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
|
||||
true /* acquire */, true /* release */, false /* weak */, rscratch2);
|
||||
__ cset($res$$Register, Assembler::EQ);
|
||||
__ bind(good);
|
||||
}
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
|
||||
predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
|
||||
ins_cost(2 * VOLATILE_REF_COST);
|
||||
|
||||
format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
|
||||
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
|
||||
false /* acquire */, true /* release */, false /* weak */, $res$$Register);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
Label good;
|
||||
__ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
|
||||
__ andr(rscratch1, rscratch1, $res$$Register);
|
||||
__ cbz(rscratch1, good);
|
||||
z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
|
||||
false /* acquire */, true /* release */, false /* weak */, $res$$Register);
|
||||
__ bind(good);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct z_compareAndSwapP(iRegINoSp res,
|
||||
indirect mem,
|
||||
iRegP oldval, iRegP newval, iRegP keepalive,
|
||||
rFlagsReg cr) %{
|
||||
|
||||
match(Set res (ZCompareAndSwapP (Binary mem keepalive) (Binary oldval newval)));
|
||||
match(Set res (ZWeakCompareAndSwapP (Binary mem keepalive) (Binary oldval newval)));
|
||||
|
||||
ins_cost(2 * VOLATILE_REF_COST);
|
||||
|
||||
effect(KILL cr);
|
||||
|
||||
format %{
|
||||
"cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
|
||||
"cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
|
||||
ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
|
||||
aarch64_enc_cset_eq(res));
|
||||
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
|
||||
instruct z_get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev,
|
||||
iRegP keepalive) %{
|
||||
match(Set prev (ZGetAndSetP mem (Binary newv keepalive)));
|
||||
instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
|
||||
predicate(UseZGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
|
||||
ins_cost(2 * VOLATILE_REF_COST);
|
||||
|
||||
format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
|
||||
|
||||
ins_encode %{
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
|
||||
true /* acquire */, true /* release */, false /* weak */, $res$$Register);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
Label good;
|
||||
__ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
|
||||
__ andr(rscratch1, rscratch1, $res$$Register);
|
||||
__ cbz(rscratch1, good);
|
||||
z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
|
||||
true /* acquire */, true /* release */, false /* weak */, $res$$Register);
|
||||
__ bind(good);
|
||||
}
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
|
||||
match(Set prev (GetAndSetP mem newv));
|
||||
predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
|
||||
effect(TEMP_DEF prev, KILL cr);
|
||||
|
||||
ins_cost(2 * VOLATILE_REF_COST);
|
||||
|
||||
format %{ "atomic_xchg $prev, $newv, [$mem]" %}
|
||||
|
||||
ins_encode %{
|
||||
__ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
|
||||
__ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
|
||||
}
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
|
||||
match(Set prev (GetAndSetP mem newv));
|
||||
predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
|
||||
effect(TEMP_DEF prev, KILL cr);
|
||||
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
|
||||
format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %}
|
||||
|
||||
ins_encode %{
|
||||
__ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
@ -2132,6 +2132,65 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) {
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
// Push lots of registers in the bit set supplied. Don't push sp.
|
||||
// Return the number of words pushed
|
||||
int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
|
||||
int words_pushed = 0;
|
||||
|
||||
// Scan bitset to accumulate register pairs
|
||||
unsigned char regs[32];
|
||||
int count = 0;
|
||||
for (int reg = 0; reg <= 31; reg++) {
|
||||
if (1 & bitset)
|
||||
regs[count++] = reg;
|
||||
bitset >>= 1;
|
||||
}
|
||||
regs[count++] = zr->encoding_nocheck();
|
||||
count &= ~1; // Only push an even number of regs
|
||||
|
||||
// Always pushing full 128 bit registers.
|
||||
if (count) {
|
||||
stpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -count * wordSize * 2)));
|
||||
words_pushed += 2;
|
||||
}
|
||||
for (int i = 2; i < count; i += 2) {
|
||||
stpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
|
||||
words_pushed += 2;
|
||||
}
|
||||
|
||||
assert(words_pushed == count, "oops, pushed != count");
|
||||
return count;
|
||||
}
|
||||
|
||||
int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
|
||||
int words_pushed = 0;
|
||||
|
||||
// Scan bitset to accumulate register pairs
|
||||
unsigned char regs[32];
|
||||
int count = 0;
|
||||
for (int reg = 0; reg <= 31; reg++) {
|
||||
if (1 & bitset)
|
||||
regs[count++] = reg;
|
||||
bitset >>= 1;
|
||||
}
|
||||
regs[count++] = zr->encoding_nocheck();
|
||||
count &= ~1;
|
||||
|
||||
for (int i = 2; i < count; i += 2) {
|
||||
ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
|
||||
words_pushed += 2;
|
||||
}
|
||||
if (count) {
|
||||
ldpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, count * wordSize * 2)));
|
||||
words_pushed += 2;
|
||||
}
|
||||
|
||||
assert(words_pushed == count, "oops, pushed != count");
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
void MacroAssembler::verify_heapbase(const char* msg) {
|
||||
#if 0
|
||||
|
@ -442,12 +442,18 @@ private:
|
||||
int push(unsigned int bitset, Register stack);
|
||||
int pop(unsigned int bitset, Register stack);
|
||||
|
||||
int push_fp(unsigned int bitset, Register stack);
|
||||
int pop_fp(unsigned int bitset, Register stack);
|
||||
|
||||
void mov(Register dst, Address a);
|
||||
|
||||
public:
|
||||
void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
|
||||
void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
|
||||
|
||||
void push_fp(RegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
|
||||
void pop_fp(RegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
|
||||
|
||||
// Push and pop everything that might be clobbered by a native
|
||||
// runtime call except rscratch1 and rscratch2. (They are always
|
||||
// scratch, so we don't have to protect them.) Only save the lower
|
||||
|
@ -230,6 +230,11 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
RegSet &operator-=(const RegSet aSet) {
|
||||
*this = *this - aSet;
|
||||
return *this;
|
||||
}
|
||||
|
||||
static RegSet of(Register r1) {
|
||||
return RegSet(r1);
|
||||
}
|
||||
|
@ -23,20 +23,7 @@
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "gc/z/zArguments.hpp"
|
||||
#include "runtime/globals.hpp"
|
||||
#include "runtime/globals_extension.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
|
||||
void ZArguments::initialize_platform() {
|
||||
#ifdef COMPILER2
|
||||
// The C2 barrier slow path expects vector registers to be least
|
||||
// 16 bytes wide, which is the minimum width available on all
|
||||
// x86-64 systems. However, the user could have speficied a lower
|
||||
// number on the command-line, in which case we print a warning
|
||||
// and raise it to 16.
|
||||
if (MaxVectorSize < 16) {
|
||||
warning("ZGC requires MaxVectorSize to be at least 16");
|
||||
FLAG_SET_DEFAULT(MaxVectorSize, 16);
|
||||
}
|
||||
#endif
|
||||
// Does nothing
|
||||
}
|
||||
|
@ -24,22 +24,22 @@
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/macroAssembler.inline.hpp"
|
||||
#include "code/codeBlob.hpp"
|
||||
#include "code/vmreg.inline.hpp"
|
||||
#include "gc/z/zBarrier.inline.hpp"
|
||||
#include "gc/z/zBarrierSet.hpp"
|
||||
#include "gc/z/zBarrierSetAssembler.hpp"
|
||||
#include "gc/z/zBarrierSetRuntime.hpp"
|
||||
#include "memory/resourceArea.hpp"
|
||||
#include "runtime/stubCodeGenerator.hpp"
|
||||
#include "runtime/sharedRuntime.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#ifdef COMPILER1
|
||||
#include "c1/c1_LIRAssembler.hpp"
|
||||
#include "c1/c1_MacroAssembler.hpp"
|
||||
#include "gc/z/c1/zBarrierSetC1.hpp"
|
||||
#endif // COMPILER1
|
||||
|
||||
ZBarrierSetAssembler::ZBarrierSetAssembler() :
|
||||
_load_barrier_slow_stub(),
|
||||
_load_barrier_weak_slow_stub() {}
|
||||
#ifdef COMPILER2
|
||||
#include "gc/z/c2/zBarrierSetC2.hpp"
|
||||
#endif // COMPILER2
|
||||
|
||||
#ifdef PRODUCT
|
||||
#define BLOCK_COMMENT(str) /* nothing */
|
||||
@ -344,137 +344,327 @@ void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler*
|
||||
|
||||
#endif // COMPILER1
|
||||
|
||||
#ifdef COMPILER2
|
||||
|
||||
OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
|
||||
if (!OptoReg::is_reg(opto_reg)) {
|
||||
return OptoReg::Bad;
|
||||
}
|
||||
|
||||
const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
|
||||
if (vm_reg->is_XMMRegister()) {
|
||||
opto_reg &= ~15;
|
||||
switch (node->ideal_reg()) {
|
||||
case Op_VecX:
|
||||
opto_reg |= 2;
|
||||
break;
|
||||
case Op_VecY:
|
||||
opto_reg |= 4;
|
||||
break;
|
||||
case Op_VecZ:
|
||||
opto_reg |= 8;
|
||||
break;
|
||||
default:
|
||||
opto_reg |= 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return opto_reg;
|
||||
}
|
||||
|
||||
// We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
|
||||
extern int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
|
||||
int stack_offset, int reg, uint ireg, outputStream* st);
|
||||
|
||||
#undef __
|
||||
#define __ cgen->assembler()->
|
||||
#define __ _masm->
|
||||
|
||||
// Generates a register specific stub for calling
|
||||
// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or
|
||||
// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded().
|
||||
//
|
||||
// The raddr register serves as both input and output for this stub. When the stub is
|
||||
// called the raddr register contains the object field address (oop*) where the bad oop
|
||||
// was loaded from, which caused the slow path to be taken. On return from the stub the
|
||||
// raddr register contains the good/healed oop returned from
|
||||
// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or
|
||||
// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded().
|
||||
static address generate_load_barrier_stub(StubCodeGenerator* cgen, Register raddr, DecoratorSet decorators) {
|
||||
// Don't generate stub for invalid registers
|
||||
if (raddr == rsp || raddr == r15) {
|
||||
return NULL;
|
||||
class ZSaveLiveRegisters {
|
||||
private:
|
||||
struct XMMRegisterData {
|
||||
XMMRegister _reg;
|
||||
int _size;
|
||||
|
||||
// Used by GrowableArray::find()
|
||||
bool operator == (const XMMRegisterData& other) {
|
||||
return _reg == other._reg;
|
||||
}
|
||||
};
|
||||
|
||||
MacroAssembler* const _masm;
|
||||
GrowableArray<Register> _gp_registers;
|
||||
GrowableArray<XMMRegisterData> _xmm_registers;
|
||||
int _spill_size;
|
||||
int _spill_offset;
|
||||
|
||||
static int xmm_compare_register_size(XMMRegisterData* left, XMMRegisterData* right) {
|
||||
if (left->_size == right->_size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return (left->_size < right->_size) ? -1 : 1;
|
||||
}
|
||||
|
||||
// Create stub name
|
||||
char name[64];
|
||||
const bool weak = (decorators & ON_WEAK_OOP_REF) != 0;
|
||||
os::snprintf(name, sizeof(name), "zgc_load_barrier%s_stub_%s", weak ? "_weak" : "", raddr->name());
|
||||
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(cgen, "StubRoutines", os::strdup(name, mtCode));
|
||||
address start = __ pc();
|
||||
|
||||
// Save live registers
|
||||
if (raddr != rax) {
|
||||
__ push(rax);
|
||||
}
|
||||
if (raddr != rcx) {
|
||||
__ push(rcx);
|
||||
}
|
||||
if (raddr != rdx) {
|
||||
__ push(rdx);
|
||||
}
|
||||
if (raddr != rsi) {
|
||||
__ push(rsi);
|
||||
}
|
||||
if (raddr != rdi) {
|
||||
__ push(rdi);
|
||||
}
|
||||
if (raddr != r8) {
|
||||
__ push(r8);
|
||||
}
|
||||
if (raddr != r9) {
|
||||
__ push(r9);
|
||||
}
|
||||
if (raddr != r10) {
|
||||
__ push(r10);
|
||||
}
|
||||
if (raddr != r11) {
|
||||
__ push(r11);
|
||||
static int xmm_slot_size(OptoReg::Name opto_reg) {
|
||||
// The low order 4 bytes denote what size of the XMM register is live
|
||||
return (opto_reg & 15) << 3;
|
||||
}
|
||||
|
||||
// Setup arguments
|
||||
if (raddr != c_rarg1) {
|
||||
__ movq(c_rarg1, raddr);
|
||||
}
|
||||
__ movq(c_rarg0, Address(raddr, 0));
|
||||
|
||||
// Call barrier function
|
||||
__ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), c_rarg0, c_rarg1);
|
||||
|
||||
// Move result returned in rax to raddr, if needed
|
||||
if (raddr != rax) {
|
||||
__ movq(raddr, rax);
|
||||
static uint xmm_ideal_reg_for_size(int reg_size) {
|
||||
switch (reg_size) {
|
||||
case 8:
|
||||
return Op_VecD;
|
||||
case 16:
|
||||
return Op_VecX;
|
||||
case 32:
|
||||
return Op_VecY;
|
||||
case 64:
|
||||
return Op_VecZ;
|
||||
default:
|
||||
fatal("Invalid register size %d", reg_size);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Restore saved registers
|
||||
if (raddr != r11) {
|
||||
__ pop(r11);
|
||||
}
|
||||
if (raddr != r10) {
|
||||
__ pop(r10);
|
||||
}
|
||||
if (raddr != r9) {
|
||||
__ pop(r9);
|
||||
}
|
||||
if (raddr != r8) {
|
||||
__ pop(r8);
|
||||
}
|
||||
if (raddr != rdi) {
|
||||
__ pop(rdi);
|
||||
}
|
||||
if (raddr != rsi) {
|
||||
__ pop(rsi);
|
||||
}
|
||||
if (raddr != rdx) {
|
||||
__ pop(rdx);
|
||||
}
|
||||
if (raddr != rcx) {
|
||||
__ pop(rcx);
|
||||
}
|
||||
if (raddr != rax) {
|
||||
__ pop(rax);
|
||||
bool xmm_needs_vzeroupper() const {
|
||||
return _xmm_registers.is_nonempty() && _xmm_registers.at(0)._size > 16;
|
||||
}
|
||||
|
||||
__ ret(0);
|
||||
void xmm_register_save(const XMMRegisterData& reg_data) {
|
||||
const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
|
||||
const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
|
||||
_spill_offset -= reg_data._size;
|
||||
vec_spill_helper(__ code(), false /* do_size */, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
|
||||
}
|
||||
|
||||
return start;
|
||||
void xmm_register_restore(const XMMRegisterData& reg_data) {
|
||||
const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
|
||||
const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
|
||||
vec_spill_helper(__ code(), false /* do_size */, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
|
||||
_spill_offset += reg_data._size;
|
||||
}
|
||||
|
||||
void gp_register_save(Register reg) {
|
||||
_spill_offset -= 8;
|
||||
__ movq(Address(rsp, _spill_offset), reg);
|
||||
}
|
||||
|
||||
void gp_register_restore(Register reg) {
|
||||
__ movq(reg, Address(rsp, _spill_offset));
|
||||
_spill_offset += 8;
|
||||
}
|
||||
|
||||
void initialize(ZLoadBarrierStubC2* stub) {
|
||||
// Create mask of caller saved registers that need to
|
||||
// be saved/restored if live
|
||||
RegMask caller_saved;
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg()));
|
||||
caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
|
||||
caller_saved.Remove(OptoReg::as_OptoReg(stub->ref()->as_VMReg()));
|
||||
|
||||
// Create mask of live registers
|
||||
RegMask live = stub->live();
|
||||
if (stub->tmp() != noreg) {
|
||||
live.Insert(OptoReg::as_OptoReg(stub->tmp()->as_VMReg()));
|
||||
}
|
||||
|
||||
int gp_spill_size = 0;
|
||||
int xmm_spill_size = 0;
|
||||
|
||||
// Record registers that needs to be saved/restored
|
||||
while (live.is_NotEmpty()) {
|
||||
const OptoReg::Name opto_reg = live.find_first_elem();
|
||||
const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
|
||||
|
||||
live.Remove(opto_reg);
|
||||
|
||||
if (vm_reg->is_Register()) {
|
||||
if (caller_saved.Member(opto_reg)) {
|
||||
_gp_registers.append(vm_reg->as_Register());
|
||||
gp_spill_size += 8;
|
||||
}
|
||||
} else if (vm_reg->is_XMMRegister()) {
|
||||
// We encode in the low order 4 bits of the opto_reg, how large part of the register is live
|
||||
const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15);
|
||||
const int reg_size = xmm_slot_size(opto_reg);
|
||||
const XMMRegisterData reg_data = { vm_reg_base->as_XMMRegister(), reg_size };
|
||||
const int reg_index = _xmm_registers.find(reg_data);
|
||||
if (reg_index == -1) {
|
||||
// Not previously appended
|
||||
_xmm_registers.append(reg_data);
|
||||
xmm_spill_size += reg_size;
|
||||
} else {
|
||||
// Previously appended, update size
|
||||
const int reg_size_prev = _xmm_registers.at(reg_index)._size;
|
||||
if (reg_size > reg_size_prev) {
|
||||
_xmm_registers.at_put(reg_index, reg_data);
|
||||
xmm_spill_size += reg_size - reg_size_prev;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fatal("Unexpected register type");
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by size, largest first
|
||||
_xmm_registers.sort(xmm_compare_register_size);
|
||||
|
||||
// Stack pointer must be 16 bytes aligned for the call
|
||||
_spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size, 16);
|
||||
}
|
||||
|
||||
public:
|
||||
ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
|
||||
_masm(masm),
|
||||
_gp_registers(),
|
||||
_xmm_registers(),
|
||||
_spill_size(0),
|
||||
_spill_offset(0) {
|
||||
|
||||
//
|
||||
// Stack layout after registers have been spilled:
|
||||
//
|
||||
// | ... | original rsp, 16 bytes aligned
|
||||
// ------------------
|
||||
// | zmm0 high |
|
||||
// | ... |
|
||||
// | zmm0 low | 16 bytes aligned
|
||||
// | ... |
|
||||
// | ymm1 high |
|
||||
// | ... |
|
||||
// | ymm1 low | 16 bytes aligned
|
||||
// | ... |
|
||||
// | xmmN high |
|
||||
// | ... |
|
||||
// | xmmN low | 8 bytes aligned
|
||||
// | reg0 | 8 bytes aligned
|
||||
// | reg1 |
|
||||
// | ... |
|
||||
// | regN | new rsp, if 16 bytes aligned
|
||||
// | <padding> | else new rsp, 16 bytes aligned
|
||||
// ------------------
|
||||
//
|
||||
|
||||
// Figure out what registers to save/restore
|
||||
initialize(stub);
|
||||
|
||||
// Allocate stack space
|
||||
if (_spill_size > 0) {
|
||||
__ subptr(rsp, _spill_size);
|
||||
}
|
||||
|
||||
// Save XMM/YMM/ZMM registers
|
||||
for (int i = 0; i < _xmm_registers.length(); i++) {
|
||||
xmm_register_save(_xmm_registers.at(i));
|
||||
}
|
||||
|
||||
if (xmm_needs_vzeroupper()) {
|
||||
__ vzeroupper();
|
||||
}
|
||||
|
||||
// Save general purpose registers
|
||||
for (int i = 0; i < _gp_registers.length(); i++) {
|
||||
gp_register_save(_gp_registers.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
~ZSaveLiveRegisters() {
|
||||
// Restore general purpose registers
|
||||
for (int i = _gp_registers.length() - 1; i >= 0; i--) {
|
||||
gp_register_restore(_gp_registers.at(i));
|
||||
}
|
||||
|
||||
__ vzeroupper();
|
||||
|
||||
// Restore XMM/YMM/ZMM registers
|
||||
for (int i = _xmm_registers.length() - 1; i >= 0; i--) {
|
||||
xmm_register_restore(_xmm_registers.at(i));
|
||||
}
|
||||
|
||||
// Free stack space
|
||||
if (_spill_size > 0) {
|
||||
__ addptr(rsp, _spill_size);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class ZSetupArguments {
|
||||
private:
|
||||
MacroAssembler* const _masm;
|
||||
const Register _ref;
|
||||
const Address _ref_addr;
|
||||
|
||||
public:
|
||||
ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
|
||||
_masm(masm),
|
||||
_ref(stub->ref()),
|
||||
_ref_addr(stub->ref_addr()) {
|
||||
|
||||
// Setup arguments
|
||||
if (_ref_addr.base() == noreg) {
|
||||
// No self healing
|
||||
if (_ref != c_rarg0) {
|
||||
__ movq(c_rarg0, _ref);
|
||||
}
|
||||
__ xorq(c_rarg1, c_rarg1);
|
||||
} else {
|
||||
// Self healing
|
||||
if (_ref == c_rarg0) {
|
||||
__ lea(c_rarg1, _ref_addr);
|
||||
} else if (_ref != c_rarg1) {
|
||||
__ lea(c_rarg1, _ref_addr);
|
||||
__ movq(c_rarg0, _ref);
|
||||
} else if (_ref_addr.base() != c_rarg0 && _ref_addr.index() != c_rarg0) {
|
||||
__ movq(c_rarg0, _ref);
|
||||
__ lea(c_rarg1, _ref_addr);
|
||||
} else {
|
||||
__ xchgq(c_rarg0, c_rarg1);
|
||||
if (_ref_addr.base() == c_rarg0) {
|
||||
__ lea(c_rarg1, Address(c_rarg1, _ref_addr.index(), _ref_addr.scale(), _ref_addr.disp()));
|
||||
} else if (_ref_addr.index() == c_rarg0) {
|
||||
__ lea(c_rarg1, Address(_ref_addr.base(), c_rarg1, _ref_addr.scale(), _ref_addr.disp()));
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~ZSetupArguments() {
|
||||
// Transfer result
|
||||
if (_ref != rax) {
|
||||
__ movq(_ref, rax);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#undef __
|
||||
#define __ masm->
|
||||
|
||||
void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
|
||||
BLOCK_COMMENT("ZLoadBarrierStubC2");
|
||||
|
||||
// Stub entry
|
||||
__ bind(*stub->entry());
|
||||
|
||||
{
|
||||
ZSaveLiveRegisters save_live_registers(masm, stub);
|
||||
ZSetupArguments setup_arguments(masm, stub);
|
||||
__ call(RuntimeAddress(stub->slow_path()));
|
||||
}
|
||||
|
||||
// Stub exit
|
||||
__ jmp(*stub->continuation());
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
static void barrier_stubs_init_inner(const char* label, const DecoratorSet decorators, address* stub) {
|
||||
const int nregs = RegisterImpl::number_of_registers;
|
||||
const int code_size = nregs * 128; // Rough estimate of code size
|
||||
|
||||
ResourceMark rm;
|
||||
|
||||
CodeBuffer buf(BufferBlob::create(label, code_size));
|
||||
StubCodeGenerator cgen(&buf);
|
||||
|
||||
for (int i = 0; i < nregs; i++) {
|
||||
const Register reg = as_Register(i);
|
||||
stub[i] = generate_load_barrier_stub(&cgen, reg, decorators);
|
||||
}
|
||||
}
|
||||
|
||||
void ZBarrierSetAssembler::barrier_stubs_init() {
|
||||
barrier_stubs_init_inner("zgc_load_barrier_stubs", ON_STRONG_OOP_REF, _load_barrier_slow_stub);
|
||||
barrier_stubs_init_inner("zgc_load_barrier_weak_stubs", ON_WEAK_OOP_REF, _load_barrier_weak_slow_stub);
|
||||
}
|
||||
|
||||
address ZBarrierSetAssembler::load_barrier_slow_stub(Register reg) {
|
||||
return _load_barrier_slow_stub[reg->encoding()];
|
||||
}
|
||||
|
||||
address ZBarrierSetAssembler::load_barrier_weak_slow_stub(Register reg) {
|
||||
return _load_barrier_weak_slow_stub[reg->encoding()];
|
||||
}
|
||||
#endif // COMPILER2
|
||||
|
@ -24,6 +24,14 @@
|
||||
#ifndef CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
|
||||
#define CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
|
||||
|
||||
#include "code/vmreg.hpp"
|
||||
#include "oops/accessDecorators.hpp"
|
||||
#ifdef COMPILER2
|
||||
#include "opto/optoreg.hpp"
|
||||
#endif // COMPILER2
|
||||
|
||||
class MacroAssembler;
|
||||
|
||||
#ifdef COMPILER1
|
||||
class LIR_Assembler;
|
||||
class LIR_OprDesc;
|
||||
@ -32,14 +40,13 @@ class StubAssembler;
|
||||
class ZLoadBarrierStubC1;
|
||||
#endif // COMPILER1
|
||||
|
||||
#ifdef COMPILER2
|
||||
class Node;
|
||||
class ZLoadBarrierStubC2;
|
||||
#endif // COMPILER2
|
||||
|
||||
class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
|
||||
private:
|
||||
address _load_barrier_slow_stub[RegisterImpl::number_of_registers];
|
||||
address _load_barrier_weak_slow_stub[RegisterImpl::number_of_registers];
|
||||
|
||||
public:
|
||||
ZBarrierSetAssembler();
|
||||
|
||||
virtual void load_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
@ -82,10 +89,13 @@ public:
|
||||
DecoratorSet decorators) const;
|
||||
#endif // COMPILER1
|
||||
|
||||
virtual void barrier_stubs_init();
|
||||
#ifdef COMPILER2
|
||||
OptoReg::Name refine_register(const Node* node,
|
||||
OptoReg::Name opto_reg);
|
||||
|
||||
address load_barrier_slow_stub(Register reg);
|
||||
address load_barrier_weak_slow_stub(Register reg);
|
||||
void generate_c2_load_barrier_stub(MacroAssembler* masm,
|
||||
ZLoadBarrierStubC2* stub) const;
|
||||
#endif // COMPILER2
|
||||
};
|
||||
|
||||
#endif // CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
|
||||
|
@ -24,190 +24,144 @@
|
||||
source_hpp %{
|
||||
|
||||
#include "gc/z/c2/zBarrierSetC2.hpp"
|
||||
#include "gc/z/zThreadLocalData.hpp"
|
||||
|
||||
%}
|
||||
|
||||
source %{
|
||||
|
||||
#include "gc/z/zBarrierSetAssembler.hpp"
|
||||
static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
|
||||
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
|
||||
__ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
|
||||
__ jcc(Assembler::notZero, *stub->entry());
|
||||
__ bind(*stub->continuation());
|
||||
}
|
||||
|
||||
static void z_load_barrier_slow_reg(MacroAssembler& _masm, Register dst, Address src, bool weak) {
|
||||
assert(dst != rsp, "Invalid register");
|
||||
assert(dst != r15, "Invalid register");
|
||||
|
||||
const address stub = weak ? ZBarrierSet::assembler()->load_barrier_weak_slow_stub(dst)
|
||||
: ZBarrierSet::assembler()->load_barrier_slow_stub(dst);
|
||||
__ lea(dst, src);
|
||||
__ call(RuntimeAddress(stub));
|
||||
static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
|
||||
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
|
||||
__ jmp(*stub->entry());
|
||||
__ bind(*stub->continuation());
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
// For XMM and YMM enabled processors
|
||||
instruct zLoadBarrierSlowRegXmmAndYmm(rRegP dst, memory src, rFlagsReg cr,
|
||||
rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3,
|
||||
rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
|
||||
rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
|
||||
rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
|
||||
match(Set dst (LoadBarrierSlowReg src dst));
|
||||
predicate(UseAVX <= 2 && !n->as_LoadBarrierSlowReg()->is_weak());
|
||||
// Load Pointer
|
||||
instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
|
||||
match(Set dst (LoadP mem));
|
||||
effect(KILL cr, TEMP dst);
|
||||
|
||||
effect(KILL cr,
|
||||
KILL x0, KILL x1, KILL x2, KILL x3,
|
||||
KILL x4, KILL x5, KILL x6, KILL x7,
|
||||
KILL x8, KILL x9, KILL x10, KILL x11,
|
||||
KILL x12, KILL x13, KILL x14, KILL x15);
|
||||
ins_cost(125);
|
||||
|
||||
format %{ "lea $dst, $src\n\t"
|
||||
"call #ZLoadBarrierSlowPath" %}
|
||||
format %{ "movq $dst, $mem" %}
|
||||
|
||||
ins_encode %{
|
||||
z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, false /* weak */);
|
||||
__ movptr($dst$$Register, $mem$$Address);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, false /* weak */);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
|
||||
ins_pipe(ialu_reg_mem);
|
||||
%}
|
||||
|
||||
// For ZMM enabled processors
|
||||
instruct zLoadBarrierSlowRegZmm(rRegP dst, memory src, rFlagsReg cr,
|
||||
rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3,
|
||||
rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
|
||||
rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
|
||||
rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15,
|
||||
rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19,
|
||||
rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23,
|
||||
rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
|
||||
rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
|
||||
// Load Weak Pointer
|
||||
instruct zLoadWeakP(rRegP dst, memory mem, rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierWeak);
|
||||
match(Set dst (LoadP mem));
|
||||
effect(KILL cr, TEMP dst);
|
||||
|
||||
match(Set dst (LoadBarrierSlowReg src dst));
|
||||
predicate(UseAVX == 3 && !n->as_LoadBarrierSlowReg()->is_weak());
|
||||
ins_cost(125);
|
||||
|
||||
effect(KILL cr,
|
||||
KILL x0, KILL x1, KILL x2, KILL x3,
|
||||
KILL x4, KILL x5, KILL x6, KILL x7,
|
||||
KILL x8, KILL x9, KILL x10, KILL x11,
|
||||
KILL x12, KILL x13, KILL x14, KILL x15,
|
||||
KILL x16, KILL x17, KILL x18, KILL x19,
|
||||
KILL x20, KILL x21, KILL x22, KILL x23,
|
||||
KILL x24, KILL x25, KILL x26, KILL x27,
|
||||
KILL x28, KILL x29, KILL x30, KILL x31);
|
||||
|
||||
format %{ "lea $dst, $src\n\t"
|
||||
"call #ZLoadBarrierSlowPath" %}
|
||||
format %{ "movq $dst, $mem" %}
|
||||
|
||||
ins_encode %{
|
||||
z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, false /* weak */);
|
||||
__ movptr($dst$$Register, $mem$$Address);
|
||||
z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, true /* weak */);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
|
||||
ins_pipe(ialu_reg_mem);
|
||||
%}
|
||||
|
||||
// For XMM and YMM enabled processors
|
||||
instruct zLoadBarrierWeakSlowRegXmmAndYmm(rRegP dst, memory src, rFlagsReg cr,
|
||||
rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3,
|
||||
rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
|
||||
rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
|
||||
rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
|
||||
match(Set dst (LoadBarrierSlowReg src dst));
|
||||
predicate(UseAVX <= 2 && n->as_LoadBarrierSlowReg()->is_weak());
|
||||
instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr) %{
|
||||
match(Set oldval (CompareAndExchangeP mem (Binary oldval newval)));
|
||||
predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
|
||||
effect(KILL cr, TEMP tmp);
|
||||
|
||||
effect(KILL cr,
|
||||
KILL x0, KILL x1, KILL x2, KILL x3,
|
||||
KILL x4, KILL x5, KILL x6, KILL x7,
|
||||
KILL x8, KILL x9, KILL x10, KILL x11,
|
||||
KILL x12, KILL x13, KILL x14, KILL x15);
|
||||
|
||||
format %{ "lea $dst, $src\n\t"
|
||||
"call #ZLoadBarrierSlowPath" %}
|
||||
format %{ "lock\n\t"
|
||||
"cmpxchgq $newval, $mem" %}
|
||||
|
||||
ins_encode %{
|
||||
z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, true /* weak */);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
__ movptr($tmp$$Register, $oldval$$Register);
|
||||
}
|
||||
__ lock();
|
||||
__ cmpxchgptr($newval$$Register, $mem$$Address);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
Label good;
|
||||
__ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
|
||||
__ jcc(Assembler::zero, good);
|
||||
z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
|
||||
__ movptr($oldval$$Register, $tmp$$Register);
|
||||
__ lock();
|
||||
__ cmpxchgptr($newval$$Register, $mem$$Address);
|
||||
__ bind(good);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
|
||||
ins_pipe(pipe_cmpxchg);
|
||||
%}
|
||||
|
||||
// For ZMM enabled processors
|
||||
instruct zLoadBarrierWeakSlowRegZmm(rRegP dst, memory src, rFlagsReg cr,
|
||||
rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3,
|
||||
rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
|
||||
rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
|
||||
rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15,
|
||||
rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19,
|
||||
rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23,
|
||||
rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
|
||||
rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
|
||||
instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlagsReg cr, rax_RegP oldval) %{
|
||||
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
|
||||
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
|
||||
predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
|
||||
effect(KILL cr, KILL oldval, TEMP tmp);
|
||||
|
||||
match(Set dst (LoadBarrierSlowReg src dst));
|
||||
predicate(UseAVX == 3 && n->as_LoadBarrierSlowReg()->is_weak());
|
||||
|
||||
effect(KILL cr,
|
||||
KILL x0, KILL x1, KILL x2, KILL x3,
|
||||
KILL x4, KILL x5, KILL x6, KILL x7,
|
||||
KILL x8, KILL x9, KILL x10, KILL x11,
|
||||
KILL x12, KILL x13, KILL x14, KILL x15,
|
||||
KILL x16, KILL x17, KILL x18, KILL x19,
|
||||
KILL x20, KILL x21, KILL x22, KILL x23,
|
||||
KILL x24, KILL x25, KILL x26, KILL x27,
|
||||
KILL x28, KILL x29, KILL x30, KILL x31);
|
||||
|
||||
format %{ "lea $dst, $src\n\t"
|
||||
"call #ZLoadBarrierSlowPath" %}
|
||||
format %{ "lock\n\t"
|
||||
"cmpxchgq $newval, $mem\n\t"
|
||||
"sete $res\n\t"
|
||||
"movzbl $res, $res" %}
|
||||
|
||||
ins_encode %{
|
||||
z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, true /* weak */);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
__ movptr($tmp$$Register, $oldval$$Register);
|
||||
}
|
||||
__ lock();
|
||||
__ cmpxchgptr($newval$$Register, $mem$$Address);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
Label good;
|
||||
__ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
|
||||
__ jcc(Assembler::zero, good);
|
||||
z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
|
||||
__ movptr($oldval$$Register, $tmp$$Register);
|
||||
__ lock();
|
||||
__ cmpxchgptr($newval$$Register, $mem$$Address);
|
||||
__ bind(good);
|
||||
__ cmpptr($tmp$$Register, $oldval$$Register);
|
||||
}
|
||||
__ setb(Assembler::equal, $res$$Register);
|
||||
__ movzbl($res$$Register, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
|
||||
ins_pipe(pipe_cmpxchg);
|
||||
%}
|
||||
|
||||
// Specialized versions of compareAndExchangeP that adds a keepalive that is consumed
|
||||
// but doesn't affect output.
|
||||
instruct zXChgP(memory mem, rRegP newval, rFlagsReg cr) %{
|
||||
match(Set newval (GetAndSetP mem newval));
|
||||
predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
|
||||
effect(KILL cr);
|
||||
|
||||
instruct z_compareAndExchangeP(
|
||||
memory mem_ptr,
|
||||
rax_RegP oldval, rRegP newval, rRegP keepalive,
|
||||
rFlagsReg cr) %{
|
||||
predicate(VM_Version::supports_cx8());
|
||||
match(Set oldval (ZCompareAndExchangeP (Binary mem_ptr keepalive) (Binary oldval newval)));
|
||||
effect(KILL cr);
|
||||
format %{ "xchgq $newval, $mem" %}
|
||||
|
||||
format %{ "cmpxchgq $mem_ptr,$newval\t# "
|
||||
"If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
|
||||
opcode(0x0F, 0xB1);
|
||||
ins_encode(lock_prefix,
|
||||
REX_reg_mem_wide(newval, mem_ptr),
|
||||
OpcP, OpcS,
|
||||
reg_mem(newval, mem_ptr) // lock cmpxchg
|
||||
);
|
||||
ins_pipe( pipe_cmpxchg );
|
||||
%}
|
||||
|
||||
instruct z_compareAndSwapP(rRegI res,
|
||||
memory mem_ptr,
|
||||
rax_RegP oldval, rRegP newval, rRegP keepalive,
|
||||
rFlagsReg cr) %{
|
||||
predicate(VM_Version::supports_cx8());
|
||||
match(Set res (ZCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval)));
|
||||
match(Set res (ZWeakCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval)));
|
||||
effect(KILL cr, KILL oldval);
|
||||
|
||||
format %{ "cmpxchgq $mem_ptr,$newval\t# "
|
||||
"If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
|
||||
"sete $res\n\t"
|
||||
"movzbl $res, $res" %}
|
||||
opcode(0x0F, 0xB1);
|
||||
ins_encode(lock_prefix,
|
||||
REX_reg_mem_wide(newval, mem_ptr),
|
||||
OpcP, OpcS,
|
||||
reg_mem(newval, mem_ptr),
|
||||
REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
|
||||
REX_reg_breg(res, res), // movzbl
|
||||
Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
|
||||
ins_pipe( pipe_cmpxchg );
|
||||
%}
|
||||
|
||||
instruct z_xchgP( memory mem, rRegP newval, rRegP keepalive) %{
|
||||
match(Set newval (ZGetAndSetP mem (Binary newval keepalive)));
|
||||
format %{ "XCHGQ $newval,[$mem]" %}
|
||||
ins_encode %{
|
||||
__ xchgq($newval$$Register, $mem$$Address);
|
||||
__ xchgptr($newval$$Register, $mem$$Address);
|
||||
if (barrier_data() != ZLoadBarrierElided) {
|
||||
z_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, false /* weak */);
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_cmpxchg );
|
||||
|
||||
ins_pipe(pipe_cmpxchg);
|
||||
%}
|
||||
|
@ -1097,138 +1097,6 @@ reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0
|
||||
reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
|
||||
reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
|
||||
|
||||
reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
|
||||
reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h);
|
||||
reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p);
|
||||
|
||||
reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d);
|
||||
reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h);
|
||||
reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p);
|
||||
|
||||
reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d);
|
||||
reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h);
|
||||
reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p);
|
||||
|
||||
reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d);
|
||||
reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h);
|
||||
reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p);
|
||||
|
||||
reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d);
|
||||
reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h);
|
||||
reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p);
|
||||
|
||||
reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d);
|
||||
reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h);
|
||||
reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p);
|
||||
|
||||
reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d);
|
||||
reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h);
|
||||
reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p);
|
||||
|
||||
reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d);
|
||||
reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h);
|
||||
reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p);
|
||||
|
||||
#ifdef _LP64
|
||||
|
||||
reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d);
|
||||
reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h);
|
||||
reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p);
|
||||
|
||||
reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d);
|
||||
reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h);
|
||||
reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p);
|
||||
|
||||
reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d);
|
||||
reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h);
|
||||
reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p);
|
||||
|
||||
reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d);
|
||||
reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h);
|
||||
reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p);
|
||||
|
||||
reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d);
|
||||
reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h);
|
||||
reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p);
|
||||
|
||||
reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d);
|
||||
reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h);
|
||||
reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p);
|
||||
|
||||
reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d);
|
||||
reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h);
|
||||
reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p);
|
||||
|
||||
reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d);
|
||||
reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
|
||||
reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
|
||||
|
||||
reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d);
|
||||
reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h);
|
||||
reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p);
|
||||
|
||||
reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d);
|
||||
reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h);
|
||||
reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p);
|
||||
|
||||
reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d);
|
||||
reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h);
|
||||
reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p);
|
||||
|
||||
reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d);
|
||||
reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h);
|
||||
reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p);
|
||||
|
||||
reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d);
|
||||
reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h);
|
||||
reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p);
|
||||
|
||||
reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d);
|
||||
reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h);
|
||||
reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p);
|
||||
|
||||
reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d);
|
||||
reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h);
|
||||
reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p);
|
||||
|
||||
reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d);
|
||||
reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h);
|
||||
reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p);
|
||||
|
||||
reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d);
|
||||
reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h);
|
||||
reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p);
|
||||
|
||||
reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d);
|
||||
reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h);
|
||||
reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p);
|
||||
|
||||
reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d);
|
||||
reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h);
|
||||
reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p);
|
||||
|
||||
reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d);
|
||||
reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h);
|
||||
reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p);
|
||||
|
||||
reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d);
|
||||
reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h);
|
||||
reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p);
|
||||
|
||||
reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d);
|
||||
reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h);
|
||||
reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p);
|
||||
|
||||
reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d);
|
||||
reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h);
|
||||
reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p);
|
||||
|
||||
reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d);
|
||||
reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
|
||||
reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
|
||||
|
||||
#endif
|
||||
|
||||
%}
|
||||
|
||||
|
||||
@ -1800,8 +1668,8 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
|
||||
return (UseAVX > 2) ? 6 : 4;
|
||||
}
|
||||
|
||||
static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
|
||||
int stack_offset, int reg, uint ireg, outputStream* st) {
|
||||
int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
|
||||
int stack_offset, int reg, uint ireg, outputStream* st) {
|
||||
// In 64-bit VM size calculation is very complex. Emitting instructions
|
||||
// into scratch buffer is used to get size in 64-bit VM.
|
||||
LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
|
||||
|
@ -1058,8 +1058,8 @@ static enum RC rc_class(OptoReg::Name reg)
|
||||
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
|
||||
int src_hi, int dst_hi, uint ireg, outputStream* st);
|
||||
|
||||
static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
|
||||
int stack_offset, int reg, uint ireg, outputStream* st);
|
||||
int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
|
||||
int stack_offset, int reg, uint ireg, outputStream* st);
|
||||
|
||||
static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
|
||||
int dst_offset, uint ireg, outputStream* st) {
|
||||
@ -4260,200 +4260,6 @@ operand cmpOpUCF2() %{
|
||||
%}
|
||||
%}
|
||||
|
||||
// Operands for bound floating pointer register arguments
|
||||
operand rxmm0() %{
|
||||
constraint(ALLOC_IN_RC(xmm0_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm1() %{
|
||||
constraint(ALLOC_IN_RC(xmm1_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm2() %{
|
||||
constraint(ALLOC_IN_RC(xmm2_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm3() %{
|
||||
constraint(ALLOC_IN_RC(xmm3_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm4() %{
|
||||
constraint(ALLOC_IN_RC(xmm4_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm5() %{
|
||||
constraint(ALLOC_IN_RC(xmm5_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm6() %{
|
||||
constraint(ALLOC_IN_RC(xmm6_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm7() %{
|
||||
constraint(ALLOC_IN_RC(xmm7_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm8() %{
|
||||
constraint(ALLOC_IN_RC(xmm8_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm9() %{
|
||||
constraint(ALLOC_IN_RC(xmm9_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm10() %{
|
||||
constraint(ALLOC_IN_RC(xmm10_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm11() %{
|
||||
constraint(ALLOC_IN_RC(xmm11_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm12() %{
|
||||
constraint(ALLOC_IN_RC(xmm12_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm13() %{
|
||||
constraint(ALLOC_IN_RC(xmm13_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm14() %{
|
||||
constraint(ALLOC_IN_RC(xmm14_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm15() %{
|
||||
constraint(ALLOC_IN_RC(xmm15_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm16() %{
|
||||
constraint(ALLOC_IN_RC(xmm16_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm17() %{
|
||||
constraint(ALLOC_IN_RC(xmm17_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm18() %{
|
||||
constraint(ALLOC_IN_RC(xmm18_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm19() %{
|
||||
constraint(ALLOC_IN_RC(xmm19_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm20() %{
|
||||
constraint(ALLOC_IN_RC(xmm20_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm21() %{
|
||||
constraint(ALLOC_IN_RC(xmm21_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm22() %{
|
||||
constraint(ALLOC_IN_RC(xmm22_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm23() %{
|
||||
constraint(ALLOC_IN_RC(xmm23_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm24() %{
|
||||
constraint(ALLOC_IN_RC(xmm24_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm25() %{
|
||||
constraint(ALLOC_IN_RC(xmm25_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm26() %{
|
||||
constraint(ALLOC_IN_RC(xmm26_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm27() %{
|
||||
constraint(ALLOC_IN_RC(xmm27_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm28() %{
|
||||
constraint(ALLOC_IN_RC(xmm28_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm29() %{
|
||||
constraint(ALLOC_IN_RC(xmm29_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm30() %{
|
||||
constraint(ALLOC_IN_RC(xmm30_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
operand rxmm31() %{
|
||||
constraint(ALLOC_IN_RC(xmm31_reg));
|
||||
match(VecX);
|
||||
format%{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
//----------OPERAND CLASSES----------------------------------------------------
|
||||
// Operand Classes are groups of operands that are used as to simplify
|
||||
// instruction definitions by not requiring the AD writer to specify separate
|
||||
@ -5346,6 +5152,7 @@ instruct loadRange(rRegI dst, memory mem)
|
||||
instruct loadP(rRegP dst, memory mem)
|
||||
%{
|
||||
match(Set dst (LoadP mem));
|
||||
predicate(n->as_Load()->barrier_data() == 0);
|
||||
|
||||
ins_cost(125); // XXX
|
||||
format %{ "movq $dst, $mem\t# ptr" %}
|
||||
@ -7794,6 +7601,7 @@ instruct storePConditional(memory heap_top_ptr,
|
||||
rax_RegP oldval, rRegP newval,
|
||||
rFlagsReg cr)
|
||||
%{
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
|
||||
|
||||
format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
|
||||
@ -7845,7 +7653,7 @@ instruct compareAndSwapP(rRegI res,
|
||||
rax_RegP oldval, rRegP newval,
|
||||
rFlagsReg cr)
|
||||
%{
|
||||
predicate(VM_Version::supports_cx8());
|
||||
predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
|
||||
match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
|
||||
effect(KILL cr, KILL oldval);
|
||||
@ -8087,7 +7895,7 @@ instruct compareAndExchangeP(
|
||||
rax_RegP oldval, rRegP newval,
|
||||
rFlagsReg cr)
|
||||
%{
|
||||
predicate(VM_Version::supports_cx8());
|
||||
predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
|
||||
effect(KILL cr);
|
||||
|
||||
@ -8232,6 +8040,7 @@ instruct xchgL( memory mem, rRegL newval) %{
|
||||
|
||||
instruct xchgP( memory mem, rRegP newval) %{
|
||||
match(Set newval (GetAndSetP mem newval));
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
format %{ "XCHGQ $newval,[$mem]" %}
|
||||
ins_encode %{
|
||||
__ xchgq($newval$$Register, $mem$$Address);
|
||||
@ -11974,6 +11783,7 @@ instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
|
||||
instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
|
||||
%{
|
||||
match(Set cr (CmpP op1 (LoadP op2)));
|
||||
predicate(n->in(2)->as_Load()->barrier_data() == 0);
|
||||
|
||||
ins_cost(500); // XXX
|
||||
format %{ "cmpq $op1, $op2\t# ptr" %}
|
||||
@ -11999,7 +11809,8 @@ instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
|
||||
// and raw pointers have no anti-dependencies.
|
||||
instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
|
||||
%{
|
||||
predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
|
||||
predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
|
||||
n->in(2)->as_Load()->barrier_data() == 0);
|
||||
match(Set cr (CmpP op1 (LoadP op2)));
|
||||
|
||||
format %{ "cmpq $op1, $op2\t# raw ptr" %}
|
||||
@ -12024,7 +11835,8 @@ instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
|
||||
// any compare to a zero should be eq/neq.
|
||||
instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
|
||||
%{
|
||||
predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
|
||||
predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
|
||||
n->in(1)->as_Load()->barrier_data() == 0);
|
||||
match(Set cr (CmpP (LoadP op) zero));
|
||||
|
||||
ins_cost(500); // XXX
|
||||
@ -12037,7 +11849,9 @@ instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
|
||||
|
||||
instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
|
||||
%{
|
||||
predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL));
|
||||
predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
|
||||
(CompressedKlassPointers::base() == NULL) &&
|
||||
n->in(1)->as_Load()->barrier_data() == 0);
|
||||
match(Set cr (CmpP (LoadP mem) zero));
|
||||
|
||||
format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
|
||||
|
@ -773,11 +773,6 @@ bool InstructForm::captures_bottom_type(FormDict &globals) const {
|
||||
!strcmp(_matrule->_rChild->_opType,"CheckCastPP") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"GetAndSetP") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"GetAndSetN") ||
|
||||
#if INCLUDE_ZGC
|
||||
!strcmp(_matrule->_rChild->_opType,"ZGetAndSetP") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"ZCompareAndExchangeP") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"LoadBarrierSlowReg") ||
|
||||
#endif
|
||||
#if INCLUDE_SHENANDOAHGC
|
||||
!strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeP") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeN") ||
|
||||
@ -3510,9 +3505,6 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
|
||||
"StoreCM",
|
||||
"GetAndSetB", "GetAndSetS", "GetAndAddI", "GetAndSetI", "GetAndSetP",
|
||||
"GetAndAddB", "GetAndAddS", "GetAndAddL", "GetAndSetL", "GetAndSetN",
|
||||
#if INCLUDE_ZGC
|
||||
"ZGetAndSetP", "ZCompareAndSwapP", "ZCompareAndExchangeP", "ZWeakCompareAndSwapP",
|
||||
#endif
|
||||
"ClearArray"
|
||||
};
|
||||
int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*);
|
||||
|
@ -66,8 +66,7 @@ NOT_PRODUCT(cflags(TraceOptoOutput, bool, TraceOptoOutput, TraceOptoOutput))
|
||||
cflags(VectorizeDebug, uintx, 0, VectorizeDebug) \
|
||||
cflags(CloneMapDebug, bool, false, CloneMapDebug) \
|
||||
cflags(IGVPrintLevel, intx, PrintIdealGraphLevel, IGVPrintLevel) \
|
||||
cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit) \
|
||||
ZGC_ONLY(cflags(ZTraceLoadBarriers, bool, false, ZTraceLoadBarriers))
|
||||
cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit)
|
||||
#else
|
||||
#define compilerdirectives_c2_flags(cflags)
|
||||
#endif
|
||||
|
@ -264,7 +264,7 @@ public:
|
||||
virtual void clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const;
|
||||
|
||||
// Support for GC barriers emitted during parsing
|
||||
virtual bool has_load_barriers() const { return false; }
|
||||
virtual bool has_load_barrier_nodes() const { return false; }
|
||||
virtual bool is_gc_barrier_node(Node* node) const { return false; }
|
||||
virtual Node* step_over_gc_barrier(Node* c) const { return c; }
|
||||
virtual Node* step_over_gc_barrier_ctrl(Node* c) const { return c; }
|
||||
@ -287,13 +287,9 @@ public:
|
||||
virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return false; }
|
||||
|
||||
virtual bool has_special_unique_user(const Node* node) const { return false; }
|
||||
virtual bool needs_anti_dependence_check(const Node* node) const { return true; }
|
||||
|
||||
virtual void barrier_insertion_phase(Compile* C, PhaseIterGVN &igvn) const { }
|
||||
|
||||
enum CompilePhase {
|
||||
BeforeOptimize,
|
||||
BeforeLateInsertion,
|
||||
BeforeMacroExpand,
|
||||
BeforeCodeGen
|
||||
};
|
||||
@ -320,6 +316,10 @@ public:
|
||||
virtual Node* split_if_pre(PhaseIdealLoop* phase, Node* n) const { return NULL; }
|
||||
virtual bool build_loop_late_post(PhaseIdealLoop* phase, Node* n) const { return false; }
|
||||
virtual bool sink_node(PhaseIdealLoop* phase, Node* n, Node* x, Node* x_ctrl, Node* n_ctrl) const { return false; }
|
||||
|
||||
virtual void late_barrier_analysis() const { }
|
||||
virtual int estimate_stub_size() const { return 0; }
|
||||
virtual void emit_stubs(CodeBuffer& cb) const { }
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_SHARED_C2_BARRIERSETC2_HPP
|
||||
|
@ -103,7 +103,7 @@ public:
|
||||
static const TypeFunc* write_ref_field_pre_entry_Type();
|
||||
static const TypeFunc* shenandoah_clone_barrier_Type();
|
||||
static const TypeFunc* shenandoah_load_reference_barrier_Type();
|
||||
virtual bool has_load_barriers() const { return true; }
|
||||
virtual bool has_load_barrier_nodes() const { return true; }
|
||||
|
||||
// This is the entry-point for the backend to perform accesses through the Access API.
|
||||
virtual void clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -29,134 +29,38 @@
|
||||
#include "opto/node.hpp"
|
||||
#include "utilities/growableArray.hpp"
|
||||
|
||||
class ZCompareAndSwapPNode : public CompareAndSwapPNode {
|
||||
public:
|
||||
ZCompareAndSwapPNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { }
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
const uint8_t ZLoadBarrierStrong = 1;
|
||||
const uint8_t ZLoadBarrierWeak = 2;
|
||||
const uint8_t ZLoadBarrierElided = 3;
|
||||
|
||||
class ZWeakCompareAndSwapPNode : public WeakCompareAndSwapPNode {
|
||||
public:
|
||||
ZWeakCompareAndSwapPNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : WeakCompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { }
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class ZCompareAndExchangePNode : public CompareAndExchangePNode {
|
||||
public:
|
||||
ZCompareAndExchangePNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, const Type* t, MemNode::MemOrd mem_ord) : CompareAndExchangePNode(c, mem, adr, val, ex, at, t, mem_ord) { }
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class ZGetAndSetPNode : public GetAndSetPNode {
|
||||
public:
|
||||
ZGetAndSetPNode(Node* c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* t) : GetAndSetPNode(c, mem, adr, val, at, t) { }
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class LoadBarrierNode : public MultiNode {
|
||||
class ZLoadBarrierStubC2 : public ResourceObj {
|
||||
private:
|
||||
bool _weak; // On strong or weak oop reference
|
||||
static bool is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n);
|
||||
void push_dominated_barriers(PhaseIterGVN* igvn) const;
|
||||
const MachNode* _node;
|
||||
const Address _ref_addr;
|
||||
const Register _ref;
|
||||
const Register _tmp;
|
||||
const bool _weak;
|
||||
Label _entry;
|
||||
Label _continuation;
|
||||
|
||||
ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak);
|
||||
|
||||
public:
|
||||
enum {
|
||||
Control,
|
||||
Memory,
|
||||
Oop,
|
||||
Address,
|
||||
Number_of_Outputs = Address,
|
||||
Similar,
|
||||
Number_of_Inputs
|
||||
};
|
||||
static ZLoadBarrierStubC2* create(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak);
|
||||
|
||||
LoadBarrierNode(Compile* C,
|
||||
Node* c,
|
||||
Node* mem,
|
||||
Node* val,
|
||||
Node* adr,
|
||||
bool weak);
|
||||
|
||||
virtual int Opcode() const;
|
||||
virtual uint size_of() const;
|
||||
virtual bool cmp(const Node& n) const;
|
||||
virtual const Type *bottom_type() const;
|
||||
virtual const TypePtr* adr_type() const;
|
||||
virtual const Type *Value(PhaseGVN *phase) const;
|
||||
virtual Node *Identity(PhaseGVN *phase);
|
||||
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
|
||||
virtual uint match_edge(uint idx) const;
|
||||
|
||||
LoadBarrierNode* has_dominating_barrier(PhaseIdealLoop* phase,
|
||||
bool linear_only,
|
||||
bool look_for_similar);
|
||||
|
||||
void fix_similar_in_uses(PhaseIterGVN* igvn);
|
||||
|
||||
bool has_true_uses() const;
|
||||
|
||||
bool can_be_eliminated() const {
|
||||
return !in(Similar)->is_top();
|
||||
}
|
||||
|
||||
bool is_weak() const {
|
||||
return _weak;
|
||||
}
|
||||
};
|
||||
|
||||
class LoadBarrierSlowRegNode : public TypeNode {
|
||||
private:
|
||||
bool _is_weak;
|
||||
public:
|
||||
LoadBarrierSlowRegNode(Node *c,
|
||||
Node *adr,
|
||||
Node *src,
|
||||
const TypePtr* t,
|
||||
bool weak) :
|
||||
TypeNode(t, 3), _is_weak(weak) {
|
||||
init_req(1, adr);
|
||||
init_req(2, src);
|
||||
init_class_id(Class_LoadBarrierSlowReg);
|
||||
}
|
||||
|
||||
virtual uint size_of() const {
|
||||
return sizeof(*this);
|
||||
}
|
||||
|
||||
virtual const char * name() {
|
||||
return "LoadBarrierSlowRegNode";
|
||||
}
|
||||
|
||||
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
|
||||
bool is_weak() { return _is_weak; }
|
||||
};
|
||||
|
||||
class ZBarrierSetC2State : public ResourceObj {
|
||||
private:
|
||||
// List of load barrier nodes which need to be expanded before matching
|
||||
GrowableArray<LoadBarrierNode*>* _load_barrier_nodes;
|
||||
|
||||
public:
|
||||
ZBarrierSetC2State(Arena* comp_arena);
|
||||
int load_barrier_count() const;
|
||||
void add_load_barrier_node(LoadBarrierNode* n);
|
||||
void remove_load_barrier_node(LoadBarrierNode* n);
|
||||
LoadBarrierNode* load_barrier_node(int idx) const;
|
||||
Address ref_addr() const;
|
||||
Register ref() const;
|
||||
Register tmp() const;
|
||||
address slow_path() const;
|
||||
RegMask& live() const;
|
||||
Label* entry();
|
||||
Label* continuation();
|
||||
};
|
||||
|
||||
class ZBarrierSetC2 : public BarrierSetC2 {
|
||||
private:
|
||||
ZBarrierSetC2State* state() const;
|
||||
void expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const;
|
||||
|
||||
#ifdef ASSERT
|
||||
void verify_gc_barriers(bool post_parse) const;
|
||||
#endif
|
||||
void compute_liveness_at_stubs() const;
|
||||
void analyze_dominating_barriers() const;
|
||||
|
||||
protected:
|
||||
virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const;
|
||||
@ -174,43 +78,14 @@ protected:
|
||||
|
||||
public:
|
||||
virtual void* create_barrier_state(Arena* comp_arena) const;
|
||||
virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc,
|
||||
BasicType type,
|
||||
bool is_clone,
|
||||
ArrayCopyPhase phase) const;
|
||||
|
||||
virtual bool has_load_barriers() const { return true; }
|
||||
virtual bool is_gc_barrier_node(Node* node) const;
|
||||
virtual Node* step_over_gc_barrier(Node* c) const;
|
||||
virtual Node* step_over_gc_barrier_ctrl(Node* c) const;
|
||||
|
||||
virtual void register_potential_barrier_node(Node* node) const;
|
||||
virtual void unregister_potential_barrier_node(Node* node) const;
|
||||
virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { }
|
||||
virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const;
|
||||
virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const;
|
||||
|
||||
virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const;
|
||||
|
||||
virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const;
|
||||
virtual bool final_graph_reshaping(Compile* compile, Node* n, uint opcode) const;
|
||||
virtual bool matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const;
|
||||
virtual bool matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const;
|
||||
virtual bool needs_anti_dependence_check(const Node* node) const;
|
||||
|
||||
#ifdef ASSERT
|
||||
virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const;
|
||||
#endif
|
||||
|
||||
// Load barrier insertion and expansion external
|
||||
virtual void barrier_insertion_phase(Compile* C, PhaseIterGVN &igvn) const;
|
||||
virtual bool optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const;
|
||||
virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return (mode == LoopOptsZBarrierInsertion); }
|
||||
virtual bool strip_mined_loops_expanded(LoopOptsMode mode) const { return mode == LoopOptsZBarrierInsertion; }
|
||||
|
||||
private:
|
||||
// Load barrier insertion and expansion internal
|
||||
void insert_barriers_on_unsafe(PhaseIdealLoop* phase) const;
|
||||
void clean_catch_blocks(PhaseIdealLoop* phase, bool verify = false) const;
|
||||
void insert_load_barriers(PhaseIdealLoop* phase) const;
|
||||
LoadNode* insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) const;
|
||||
void insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited) const;
|
||||
virtual void late_barrier_analysis() const;
|
||||
virtual int estimate_stub_size() const;
|
||||
virtual void emit_stubs(CodeBuffer& cb) const;
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_Z_C2_ZBARRIERSETC2_HPP
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -24,10 +24,7 @@
|
||||
#ifndef SHARE_GC_Z_ZBARRIERSETASSEMBLER_HPP
|
||||
#define SHARE_GC_Z_ZBARRIERSETASSEMBLER_HPP
|
||||
|
||||
#include "asm/macroAssembler.hpp"
|
||||
#include "gc/shared/barrierSetAssembler.hpp"
|
||||
#include "oops/accessDecorators.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
class ZBarrierSetAssemblerBase : public BarrierSetAssembler {
|
||||
|
@ -48,9 +48,6 @@
|
||||
#include "opto/subnode.hpp"
|
||||
#include "opto/vectornode.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#if INCLUDE_ZGC
|
||||
#include "gc/z/c2/zBarrierSetC2.hpp"
|
||||
#endif
|
||||
#if INCLUDE_SHENANDOAHGC
|
||||
#include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
|
||||
#endif
|
||||
|
@ -193,17 +193,6 @@ macro(LoadP)
|
||||
macro(LoadN)
|
||||
macro(LoadRange)
|
||||
macro(LoadS)
|
||||
#if INCLUDE_ZGC
|
||||
#define zgcmacro(x) macro(x)
|
||||
#else
|
||||
#define zgcmacro(x) optionalmacro(x)
|
||||
#endif
|
||||
zgcmacro(LoadBarrier)
|
||||
zgcmacro(LoadBarrierSlowReg)
|
||||
zgcmacro(ZCompareAndSwapP)
|
||||
zgcmacro(ZWeakCompareAndSwapP)
|
||||
zgcmacro(ZCompareAndExchangeP)
|
||||
zgcmacro(ZGetAndSetP)
|
||||
macro(Lock)
|
||||
macro(Loop)
|
||||
macro(LoopLimit)
|
||||
|
@ -76,9 +76,6 @@
|
||||
#include "utilities/align.hpp"
|
||||
#include "utilities/copy.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#if INCLUDE_ZGC
|
||||
#include "gc/z/c2/zBarrierSetC2.hpp"
|
||||
#endif
|
||||
|
||||
|
||||
// -------------------- Compile::mach_constant_base_node -----------------------
|
||||
@ -990,6 +987,7 @@ Compile::Compile( ciEnv* ci_env,
|
||||
_has_method_handle_invokes(false),
|
||||
_clinit_barrier_on_entry(false),
|
||||
_comp_arena(mtCompiler),
|
||||
_barrier_set_state(BarrierSet::barrier_set()->barrier_set_c2()->create_barrier_state(comp_arena())),
|
||||
_env(ci_env),
|
||||
_directive(directive),
|
||||
_log(ci_env->log()),
|
||||
@ -2412,13 +2410,6 @@ void Compile::Optimize() {
|
||||
print_method(PHASE_MACRO_EXPANSION, 2);
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
bs->verify_gc_barriers(this, BarrierSetC2::BeforeLateInsertion);
|
||||
#endif
|
||||
|
||||
bs->barrier_insertion_phase(C, igvn);
|
||||
if (failing()) return;
|
||||
|
||||
{
|
||||
TracePhase tp("barrierExpand", &timers[_t_barrierExpand]);
|
||||
if (bs->expand_barriers(this, igvn)) {
|
||||
|
@ -55,7 +55,6 @@ class ConnectionGraph;
|
||||
class IdealGraphPrinter;
|
||||
class InlineTree;
|
||||
class Int_Array;
|
||||
class LoadBarrierNode;
|
||||
class Matcher;
|
||||
class MachConstantNode;
|
||||
class MachConstantBaseNode;
|
||||
@ -96,7 +95,6 @@ enum LoopOptsMode {
|
||||
LoopOptsNone,
|
||||
LoopOptsShenandoahExpand,
|
||||
LoopOptsShenandoahPostExpand,
|
||||
LoopOptsZBarrierInsertion,
|
||||
LoopOptsSkipSplitIf,
|
||||
LoopOptsVerify
|
||||
};
|
||||
@ -1186,11 +1184,7 @@ class Compile : public Phase {
|
||||
bool in_scratch_emit_size() const { return _in_scratch_emit_size; }
|
||||
|
||||
enum ScratchBufferBlob {
|
||||
#if defined(PPC64)
|
||||
MAX_inst_size = 2048,
|
||||
#else
|
||||
MAX_inst_size = 1024,
|
||||
#endif
|
||||
MAX_locs_size = 128, // number of relocInfo elements
|
||||
MAX_const_size = 128,
|
||||
MAX_stubs_size = 128
|
||||
@ -1265,14 +1259,30 @@ class Compile : public Phase {
|
||||
// Process an OopMap Element while emitting nodes
|
||||
void Process_OopMap_Node(MachNode *mach, int code_offset);
|
||||
|
||||
class BufferSizingData {
|
||||
public:
|
||||
int _stub;
|
||||
int _code;
|
||||
int _const;
|
||||
int _reloc;
|
||||
|
||||
BufferSizingData() :
|
||||
_stub(0),
|
||||
_code(0),
|
||||
_const(0),
|
||||
_reloc(0)
|
||||
{ };
|
||||
};
|
||||
|
||||
// Initialize code buffer
|
||||
CodeBuffer* init_buffer(uint* blk_starts);
|
||||
void estimate_buffer_size(int& const_req);
|
||||
CodeBuffer* init_buffer(BufferSizingData& buf_sizes);
|
||||
|
||||
// Write out basic block data to code buffer
|
||||
void fill_buffer(CodeBuffer* cb, uint* blk_starts);
|
||||
|
||||
// Determine which variable sized branches can be shortened
|
||||
void shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size);
|
||||
void shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes);
|
||||
|
||||
// Compute the size of first NumberOfLoopInstrToAlign instructions
|
||||
// at the head of a loop.
|
||||
|
@ -993,18 +993,6 @@ void LoopNode::verify_strip_mined(int expect_skeleton) const {
|
||||
}
|
||||
}
|
||||
|
||||
if (UseZGC && !inner_out->in(0)->is_CountedLoopEnd()) {
|
||||
// In some very special cases there can be a load that has no other uses than the
|
||||
// counted loop safepoint. Then its loadbarrier will be placed between the inner
|
||||
// loop exit and the safepoint. This is very rare
|
||||
|
||||
Node* ifnode = inner_out->in(1)->in(0);
|
||||
// Region->IfTrue->If == Region->Iffalse->If
|
||||
if (ifnode == inner_out->in(2)->in(0)) {
|
||||
inner_out = ifnode->in(0);
|
||||
}
|
||||
}
|
||||
|
||||
CountedLoopEndNode* cle = inner_out->in(0)->as_CountedLoopEnd();
|
||||
assert(cle == inner->loopexit_or_null(), "mismatch");
|
||||
bool has_skeleton = outer_le->in(1)->bottom_type()->singleton() && outer_le->in(1)->bottom_type()->is_int()->get_con() == 0;
|
||||
|
@ -41,9 +41,6 @@
|
||||
#include "opto/rootnode.hpp"
|
||||
#include "opto/subnode.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#if INCLUDE_ZGC
|
||||
#include "gc/z/c2/zBarrierSetC2.hpp"
|
||||
#endif
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------split_thru_phi---------------------------------
|
||||
|
@ -197,7 +197,7 @@ public:
|
||||
// ADLC inherit from this class.
|
||||
class MachNode : public Node {
|
||||
public:
|
||||
MachNode() : Node((uint)0), _num_opnds(0), _opnds(NULL) {
|
||||
MachNode() : Node((uint)0), _barrier(0), _num_opnds(0), _opnds(NULL) {
|
||||
init_class_id(Class_Mach);
|
||||
}
|
||||
// Required boilerplate
|
||||
@ -211,6 +211,9 @@ public:
|
||||
// no constant base node input.
|
||||
virtual uint mach_constant_base_node_input() const { return (uint)-1; }
|
||||
|
||||
uint8_t barrier_data() const { return _barrier; }
|
||||
void set_barrier_data(uint data) { _barrier = data; }
|
||||
|
||||
// Copy inputs and operands to new node of instruction.
|
||||
// Called from cisc_version() and short_branch_version().
|
||||
// !!!! The method's body is defined in ad_<arch>.cpp file.
|
||||
@ -255,6 +258,9 @@ public:
|
||||
// output have choices - but they must use the same choice.
|
||||
virtual uint two_adr( ) const { return 0; }
|
||||
|
||||
// The GC might require some barrier metadata for machine code emission.
|
||||
uint8_t _barrier;
|
||||
|
||||
// Array of complex operand pointers. Each corresponds to zero or
|
||||
// more leafs. Must be set by MachNode constructor to point to an
|
||||
// internal array of MachOpers. The MachOper array is sized by
|
||||
|
@ -1751,6 +1751,13 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
|
||||
_shared_nodes.map(leaf->_idx, ex);
|
||||
}
|
||||
|
||||
// Have mach nodes inherit GC barrier data
|
||||
if (leaf->is_LoadStore()) {
|
||||
mach->set_barrier_data(leaf->as_LoadStore()->barrier_data());
|
||||
} else if (leaf->is_Mem()) {
|
||||
mach->set_barrier_data(leaf->as_Mem()->barrier_data());
|
||||
}
|
||||
|
||||
return ex;
|
||||
}
|
||||
|
||||
|
@ -49,9 +49,6 @@
|
||||
#include "utilities/copy.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#include "utilities/vmError.hpp"
|
||||
#if INCLUDE_ZGC
|
||||
#include "gc/z/c2/zBarrierSetC2.hpp"
|
||||
#endif
|
||||
|
||||
// Portions of code courtesy of Clifford Click
|
||||
|
||||
@ -2851,7 +2848,7 @@ LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const Ty
|
||||
: Node(required),
|
||||
_type(rt),
|
||||
_adr_type(at),
|
||||
_has_barrier(false)
|
||||
_barrier(0)
|
||||
{
|
||||
init_req(MemNode::Control, c );
|
||||
init_req(MemNode::Memory , mem);
|
||||
|
@ -43,6 +43,8 @@ private:
|
||||
bool _unaligned_access; // Unaligned access from unsafe
|
||||
bool _mismatched_access; // Mismatched access from unsafe: byte read in integer array for instance
|
||||
bool _unsafe_access; // Access of unsafe origin.
|
||||
uint8_t _barrier; // Bit field with barrier information
|
||||
|
||||
protected:
|
||||
#ifdef ASSERT
|
||||
const TypePtr* _adr_type; // What kind of memory is being addressed?
|
||||
@ -62,18 +64,30 @@ public:
|
||||
unset // The memory ordering is not set (used for testing)
|
||||
} MemOrd;
|
||||
protected:
|
||||
MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at )
|
||||
: Node(c0,c1,c2 ), _unaligned_access(false), _mismatched_access(false), _unsafe_access(false) {
|
||||
MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at ) :
|
||||
Node(c0,c1,c2),
|
||||
_unaligned_access(false),
|
||||
_mismatched_access(false),
|
||||
_unsafe_access(false),
|
||||
_barrier(0) {
|
||||
init_class_id(Class_Mem);
|
||||
debug_only(_adr_type=at; adr_type();)
|
||||
}
|
||||
MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 )
|
||||
: Node(c0,c1,c2,c3), _unaligned_access(false), _mismatched_access(false), _unsafe_access(false) {
|
||||
MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 ) :
|
||||
Node(c0,c1,c2,c3),
|
||||
_unaligned_access(false),
|
||||
_mismatched_access(false),
|
||||
_unsafe_access(false),
|
||||
_barrier(0) {
|
||||
init_class_id(Class_Mem);
|
||||
debug_only(_adr_type=at; adr_type();)
|
||||
}
|
||||
MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4)
|
||||
: Node(c0,c1,c2,c3,c4), _unaligned_access(false), _mismatched_access(false), _unsafe_access(false) {
|
||||
MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4) :
|
||||
Node(c0,c1,c2,c3,c4),
|
||||
_unaligned_access(false),
|
||||
_mismatched_access(false),
|
||||
_unsafe_access(false),
|
||||
_barrier(0) {
|
||||
init_class_id(Class_Mem);
|
||||
debug_only(_adr_type=at; adr_type();)
|
||||
}
|
||||
@ -125,6 +139,9 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
uint8_t barrier_data() { return _barrier; }
|
||||
void set_barrier_data(uint8_t barrier_data) { _barrier = barrier_data; }
|
||||
|
||||
// Search through memory states which precede this node (load or store).
|
||||
// Look for an exact match for the address, with no intervening
|
||||
// aliased stores.
|
||||
@ -181,8 +198,6 @@ private:
|
||||
// this field.
|
||||
const MemOrd _mo;
|
||||
|
||||
uint _barrier; // Bit field with barrier information
|
||||
|
||||
AllocateNode* is_new_object_mark_load(PhaseGVN *phase) const;
|
||||
|
||||
protected:
|
||||
@ -196,7 +211,7 @@ protected:
|
||||
public:
|
||||
|
||||
LoadNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt, MemOrd mo, ControlDependency control_dependency)
|
||||
: MemNode(c,mem,adr,at), _control_dependency(control_dependency), _mo(mo), _barrier(0), _type(rt) {
|
||||
: MemNode(c,mem,adr,at), _control_dependency(control_dependency), _mo(mo), _type(rt) {
|
||||
init_class_id(Class_Load);
|
||||
}
|
||||
inline bool is_unordered() const { return !is_acquire(); }
|
||||
@ -265,10 +280,6 @@ public:
|
||||
Node* convert_to_unsigned_load(PhaseGVN& gvn);
|
||||
Node* convert_to_signed_load(PhaseGVN& gvn);
|
||||
|
||||
void copy_barrier_info(const Node* src) { _barrier = src->as_Load()->_barrier; }
|
||||
uint barrier_data() { return _barrier; }
|
||||
void set_barrier_data(uint barrier_data) { _barrier |= barrier_data; }
|
||||
|
||||
void pin() { _control_dependency = Pinned; }
|
||||
bool has_unknown_control_dependency() const { return _control_dependency == UnknownControl; }
|
||||
|
||||
@ -820,7 +831,7 @@ class LoadStoreNode : public Node {
|
||||
private:
|
||||
const Type* const _type; // What kind of value is loaded?
|
||||
const TypePtr* _adr_type; // What kind of memory is being addressed?
|
||||
bool _has_barrier;
|
||||
uint8_t _barrier; // Bit field with barrier information
|
||||
virtual uint size_of() const; // Size is bigger
|
||||
public:
|
||||
LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required );
|
||||
@ -833,8 +844,9 @@ public:
|
||||
|
||||
bool result_not_used() const;
|
||||
MemBarNode* trailing_membar() const;
|
||||
void set_has_barrier() { _has_barrier = true; };
|
||||
bool has_barrier() const { return _has_barrier; };
|
||||
|
||||
uint8_t barrier_data() { return _barrier; }
|
||||
void set_barrier_data(uint8_t barrier_data) { _barrier = barrier_data; }
|
||||
};
|
||||
|
||||
class LoadStoreConditionalNode : public LoadStoreNode {
|
||||
@ -886,6 +898,7 @@ public:
|
||||
MemNode::MemOrd order() const {
|
||||
return _mem_ord;
|
||||
}
|
||||
virtual uint size_of() const { return sizeof(*this); }
|
||||
};
|
||||
|
||||
class CompareAndExchangeNode : public LoadStoreNode {
|
||||
@ -903,6 +916,7 @@ public:
|
||||
MemNode::MemOrd order() const {
|
||||
return _mem_ord;
|
||||
}
|
||||
virtual uint size_of() const { return sizeof(*this); }
|
||||
};
|
||||
|
||||
//------------------------------CompareAndSwapBNode---------------------------
|
||||
|
@ -546,9 +546,6 @@ Node *Node::clone() const {
|
||||
if (n->is_SafePoint()) {
|
||||
n->as_SafePoint()->clone_replaced_nodes();
|
||||
}
|
||||
if (n->is_Load()) {
|
||||
n->as_Load()->copy_barrier_info(this);
|
||||
}
|
||||
return n; // Return the clone
|
||||
}
|
||||
|
||||
@ -1473,10 +1470,6 @@ bool Node::needs_anti_dependence_check() const {
|
||||
if (req() < 2 || (_flags & Flag_needs_anti_dependence_check) == 0) {
|
||||
return false;
|
||||
}
|
||||
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
|
||||
if (!bs->needs_anti_dependence_check(this)) {
|
||||
return false;
|
||||
}
|
||||
return in(1)->bottom_type()->has_memory();
|
||||
}
|
||||
|
||||
|
@ -83,8 +83,6 @@ class JVMState;
|
||||
class JumpNode;
|
||||
class JumpProjNode;
|
||||
class LoadNode;
|
||||
class LoadBarrierNode;
|
||||
class LoadBarrierSlowRegNode;
|
||||
class LoadStoreNode;
|
||||
class LoadStoreConditionalNode;
|
||||
class LockNode;
|
||||
@ -642,7 +640,6 @@ public:
|
||||
DEFINE_CLASS_ID(MemBar, Multi, 3)
|
||||
DEFINE_CLASS_ID(Initialize, MemBar, 0)
|
||||
DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1)
|
||||
DEFINE_CLASS_ID(LoadBarrier, Multi, 4)
|
||||
|
||||
DEFINE_CLASS_ID(Mach, Node, 1)
|
||||
DEFINE_CLASS_ID(MachReturn, Mach, 0)
|
||||
@ -679,7 +676,6 @@ public:
|
||||
DEFINE_CLASS_ID(EncodeNarrowPtr, Type, 6)
|
||||
DEFINE_CLASS_ID(EncodeP, EncodeNarrowPtr, 0)
|
||||
DEFINE_CLASS_ID(EncodePKlass, EncodeNarrowPtr, 1)
|
||||
DEFINE_CLASS_ID(LoadBarrierSlowReg, Type, 7)
|
||||
|
||||
DEFINE_CLASS_ID(Proj, Node, 3)
|
||||
DEFINE_CLASS_ID(CatchProj, Proj, 0)
|
||||
@ -836,8 +832,6 @@ public:
|
||||
DEFINE_CLASS_QUERY(Load)
|
||||
DEFINE_CLASS_QUERY(LoadStore)
|
||||
DEFINE_CLASS_QUERY(LoadStoreConditional)
|
||||
DEFINE_CLASS_QUERY(LoadBarrier)
|
||||
DEFINE_CLASS_QUERY(LoadBarrierSlowReg)
|
||||
DEFINE_CLASS_QUERY(Lock)
|
||||
DEFINE_CLASS_QUERY(Loop)
|
||||
DEFINE_CLASS_QUERY(Mach)
|
||||
|
@ -31,6 +31,8 @@
|
||||
#include "compiler/compileBroker.hpp"
|
||||
#include "compiler/compilerDirectives.hpp"
|
||||
#include "compiler/oopMap.hpp"
|
||||
#include "gc/shared/barrierSet.hpp"
|
||||
#include "gc/shared/c2/barrierSetC2.hpp"
|
||||
#include "memory/allocation.inline.hpp"
|
||||
#include "opto/ad.hpp"
|
||||
#include "opto/callnode.hpp"
|
||||
@ -114,35 +116,33 @@ void Compile::Output() {
|
||||
}
|
||||
}
|
||||
|
||||
// Keeper of sizing aspects
|
||||
BufferSizingData buf_sizes = BufferSizingData();
|
||||
|
||||
// Initialize code buffer
|
||||
estimate_buffer_size(buf_sizes._const);
|
||||
if (failing()) return;
|
||||
|
||||
// Pre-compute the length of blocks and replace
|
||||
// long branches with short if machine supports it.
|
||||
// Must be done before ScheduleAndBundle due to SPARC delay slots
|
||||
uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1);
|
||||
blk_starts[0] = 0;
|
||||
shorten_branches(blk_starts, buf_sizes);
|
||||
|
||||
// Initialize code buffer and process short branches.
|
||||
CodeBuffer* cb = init_buffer(blk_starts);
|
||||
|
||||
if (cb == NULL || failing()) {
|
||||
ScheduleAndBundle();
|
||||
if (failing()) {
|
||||
return;
|
||||
}
|
||||
|
||||
ScheduleAndBundle();
|
||||
// Late barrier analysis must be done after schedule and bundle
|
||||
// Otherwise liveness based spilling will fail
|
||||
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
|
||||
bs->late_barrier_analysis();
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (trace_opto_output()) {
|
||||
tty->print("\n---- After ScheduleAndBundle ----\n");
|
||||
for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
|
||||
tty->print("\nBB#%03d:\n", i);
|
||||
Block* block = _cfg->get_block(i);
|
||||
for (uint j = 0; j < block->number_of_nodes(); j++) {
|
||||
Node* n = block->get_node(j);
|
||||
OptoReg::Name reg = _regalloc->get_reg_first(n);
|
||||
tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
|
||||
n->dump();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (failing()) {
|
||||
// Complete sizing of codebuffer
|
||||
CodeBuffer* cb = init_buffer(buf_sizes);
|
||||
if (cb == NULL || failing()) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -223,7 +223,7 @@ void Compile::compute_loop_first_inst_sizes() {
|
||||
|
||||
// The architecture description provides short branch variants for some long
|
||||
// branch instructions. Replace eligible long branches with short branches.
|
||||
void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
|
||||
void Compile::shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes) {
|
||||
// Compute size of each block, method size, and relocation information size
|
||||
uint nblocks = _cfg->number_of_blocks();
|
||||
|
||||
@ -241,11 +241,11 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
|
||||
bool has_short_branch_candidate = false;
|
||||
|
||||
// Initialize the sizes to 0
|
||||
code_size = 0; // Size in bytes of generated code
|
||||
stub_size = 0; // Size in bytes of all stub entries
|
||||
int code_size = 0; // Size in bytes of generated code
|
||||
int stub_size = 0; // Size in bytes of all stub entries
|
||||
// Size in bytes of all relocation entries, including those in local stubs.
|
||||
// Start with 2-bytes of reloc info for the unvalidated entry point
|
||||
reloc_size = 1; // Number of relocation entries
|
||||
int reloc_size = 1; // Number of relocation entries
|
||||
|
||||
// Make three passes. The first computes pessimistic blk_starts,
|
||||
// relative jmp_offset and reloc_size information. The second performs
|
||||
@ -479,6 +479,10 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
|
||||
// a relocation index.
|
||||
// The CodeBuffer will expand the locs array if this estimate is too low.
|
||||
reloc_size *= 10 / sizeof(relocInfo);
|
||||
|
||||
buf_sizes._reloc = reloc_size;
|
||||
buf_sizes._code = code_size;
|
||||
buf_sizes._stub = stub_size;
|
||||
}
|
||||
|
||||
//------------------------------FillLocArray-----------------------------------
|
||||
@ -490,8 +494,8 @@ static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Lo
|
||||
// This should never have accepted Bad before
|
||||
assert(OptoReg::is_valid(regnum), "location must be valid");
|
||||
return (OptoReg::is_reg(regnum))
|
||||
? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
|
||||
: new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum)));
|
||||
? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
|
||||
: new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum)));
|
||||
}
|
||||
|
||||
|
||||
@ -610,12 +614,12 @@ void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
|
||||
}
|
||||
#endif //_LP64
|
||||
else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) &&
|
||||
OptoReg::is_reg(regnum) ) {
|
||||
OptoReg::is_reg(regnum) ) {
|
||||
array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double()
|
||||
? Location::float_in_dbl : Location::normal ));
|
||||
? Location::float_in_dbl : Location::normal ));
|
||||
} else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) {
|
||||
array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long
|
||||
? Location::int_in_long : Location::normal ));
|
||||
? Location::int_in_long : Location::normal ));
|
||||
} else if( t->base() == Type::NarrowOop ) {
|
||||
array->append(new_loc_value( _regalloc, regnum, Location::narrowoop ));
|
||||
} else {
|
||||
@ -626,48 +630,48 @@ void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
|
||||
|
||||
// No register. It must be constant data.
|
||||
switch (t->base()) {
|
||||
case Type::Half: // Second half of a double
|
||||
ShouldNotReachHere(); // Caller should skip 2nd halves
|
||||
break;
|
||||
case Type::AnyPtr:
|
||||
array->append(new ConstantOopWriteValue(NULL));
|
||||
break;
|
||||
case Type::AryPtr:
|
||||
case Type::InstPtr: // fall through
|
||||
array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding()));
|
||||
break;
|
||||
case Type::NarrowOop:
|
||||
if (t == TypeNarrowOop::NULL_PTR) {
|
||||
case Type::Half: // Second half of a double
|
||||
ShouldNotReachHere(); // Caller should skip 2nd halves
|
||||
break;
|
||||
case Type::AnyPtr:
|
||||
array->append(new ConstantOopWriteValue(NULL));
|
||||
} else {
|
||||
array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
|
||||
}
|
||||
break;
|
||||
case Type::Int:
|
||||
array->append(new ConstantIntValue(t->is_int()->get_con()));
|
||||
break;
|
||||
case Type::RawPtr:
|
||||
// A return address (T_ADDRESS).
|
||||
assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
|
||||
break;
|
||||
case Type::AryPtr:
|
||||
case Type::InstPtr: // fall through
|
||||
array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding()));
|
||||
break;
|
||||
case Type::NarrowOop:
|
||||
if (t == TypeNarrowOop::NULL_PTR) {
|
||||
array->append(new ConstantOopWriteValue(NULL));
|
||||
} else {
|
||||
array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
|
||||
}
|
||||
break;
|
||||
case Type::Int:
|
||||
array->append(new ConstantIntValue(t->is_int()->get_con()));
|
||||
break;
|
||||
case Type::RawPtr:
|
||||
// A return address (T_ADDRESS).
|
||||
assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
|
||||
#ifdef _LP64
|
||||
// Must be restored to the full-width 64-bit stack slot.
|
||||
array->append(new ConstantLongValue(t->is_ptr()->get_con()));
|
||||
// Must be restored to the full-width 64-bit stack slot.
|
||||
array->append(new ConstantLongValue(t->is_ptr()->get_con()));
|
||||
#else
|
||||
array->append(new ConstantIntValue(t->is_ptr()->get_con()));
|
||||
array->append(new ConstantIntValue(t->is_ptr()->get_con()));
|
||||
#endif
|
||||
break;
|
||||
case Type::FloatCon: {
|
||||
float f = t->is_float_constant()->getf();
|
||||
array->append(new ConstantIntValue(jint_cast(f)));
|
||||
break;
|
||||
}
|
||||
case Type::DoubleCon: {
|
||||
jdouble d = t->is_double_constant()->getd();
|
||||
break;
|
||||
case Type::FloatCon: {
|
||||
float f = t->is_float_constant()->getf();
|
||||
array->append(new ConstantIntValue(jint_cast(f)));
|
||||
break;
|
||||
}
|
||||
case Type::DoubleCon: {
|
||||
jdouble d = t->is_double_constant()->getd();
|
||||
#ifdef _LP64
|
||||
array->append(new ConstantIntValue((jint)0));
|
||||
array->append(new ConstantDoubleValue(d));
|
||||
array->append(new ConstantIntValue((jint)0));
|
||||
array->append(new ConstantDoubleValue(d));
|
||||
#else
|
||||
// Repack the double as two jints.
|
||||
// Repack the double as two jints.
|
||||
// The convention the interpreter uses is that the second local
|
||||
// holds the first raw word of the native double representation.
|
||||
// This is actually reasonable, since locals and stack arrays
|
||||
@ -679,15 +683,15 @@ void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
|
||||
array->append(new ConstantIntValue(acc.words[1]));
|
||||
array->append(new ConstantIntValue(acc.words[0]));
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case Type::Long: {
|
||||
jlong d = t->is_long()->get_con();
|
||||
break;
|
||||
}
|
||||
case Type::Long: {
|
||||
jlong d = t->is_long()->get_con();
|
||||
#ifdef _LP64
|
||||
array->append(new ConstantIntValue((jint)0));
|
||||
array->append(new ConstantLongValue(d));
|
||||
array->append(new ConstantIntValue((jint)0));
|
||||
array->append(new ConstantLongValue(d));
|
||||
#else
|
||||
// Repack the long as two jints.
|
||||
// Repack the long as two jints.
|
||||
// The convention the interpreter uses is that the second local
|
||||
// holds the first raw word of the native double representation.
|
||||
// This is actually reasonable, since locals and stack arrays
|
||||
@ -699,14 +703,14 @@ void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
|
||||
array->append(new ConstantIntValue(acc.words[1]));
|
||||
array->append(new ConstantIntValue(acc.words[0]));
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case Type::Top: // Add an illegal value here
|
||||
array->append(new LocationValue(Location()));
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
break;
|
||||
}
|
||||
case Type::Top: // Add an illegal value here
|
||||
array->append(new LocationValue(Location()));
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -871,58 +875,58 @@ void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) {
|
||||
|
||||
// A simplified version of Process_OopMap_Node, to handle non-safepoints.
|
||||
class NonSafepointEmitter {
|
||||
Compile* C;
|
||||
JVMState* _pending_jvms;
|
||||
int _pending_offset;
|
||||
Compile* C;
|
||||
JVMState* _pending_jvms;
|
||||
int _pending_offset;
|
||||
|
||||
void emit_non_safepoint();
|
||||
void emit_non_safepoint();
|
||||
|
||||
public:
|
||||
NonSafepointEmitter(Compile* compile) {
|
||||
this->C = compile;
|
||||
_pending_jvms = NULL;
|
||||
_pending_offset = 0;
|
||||
}
|
||||
NonSafepointEmitter(Compile* compile) {
|
||||
this->C = compile;
|
||||
_pending_jvms = NULL;
|
||||
_pending_offset = 0;
|
||||
}
|
||||
|
||||
void observe_instruction(Node* n, int pc_offset) {
|
||||
if (!C->debug_info()->recording_non_safepoints()) return;
|
||||
void observe_instruction(Node* n, int pc_offset) {
|
||||
if (!C->debug_info()->recording_non_safepoints()) return;
|
||||
|
||||
Node_Notes* nn = C->node_notes_at(n->_idx);
|
||||
if (nn == NULL || nn->jvms() == NULL) return;
|
||||
if (_pending_jvms != NULL &&
|
||||
_pending_jvms->same_calls_as(nn->jvms())) {
|
||||
// Repeated JVMS? Stretch it up here.
|
||||
_pending_offset = pc_offset;
|
||||
} else {
|
||||
Node_Notes* nn = C->node_notes_at(n->_idx);
|
||||
if (nn == NULL || nn->jvms() == NULL) return;
|
||||
if (_pending_jvms != NULL &&
|
||||
_pending_jvms->same_calls_as(nn->jvms())) {
|
||||
// Repeated JVMS? Stretch it up here.
|
||||
_pending_offset = pc_offset;
|
||||
} else {
|
||||
if (_pending_jvms != NULL &&
|
||||
_pending_offset < pc_offset) {
|
||||
emit_non_safepoint();
|
||||
}
|
||||
_pending_jvms = NULL;
|
||||
if (pc_offset > C->debug_info()->last_pc_offset()) {
|
||||
// This is the only way _pending_jvms can become non-NULL:
|
||||
_pending_jvms = nn->jvms();
|
||||
_pending_offset = pc_offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Stay out of the way of real safepoints:
|
||||
void observe_safepoint(JVMState* jvms, int pc_offset) {
|
||||
if (_pending_jvms != NULL &&
|
||||
!_pending_jvms->same_calls_as(jvms) &&
|
||||
_pending_offset < pc_offset) {
|
||||
emit_non_safepoint();
|
||||
}
|
||||
_pending_jvms = NULL;
|
||||
if (pc_offset > C->debug_info()->last_pc_offset()) {
|
||||
// This is the only way _pending_jvms can become non-NULL:
|
||||
_pending_jvms = nn->jvms();
|
||||
_pending_offset = pc_offset;
|
||||
}
|
||||
|
||||
void flush_at_end() {
|
||||
if (_pending_jvms != NULL) {
|
||||
emit_non_safepoint();
|
||||
}
|
||||
_pending_jvms = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Stay out of the way of real safepoints:
|
||||
void observe_safepoint(JVMState* jvms, int pc_offset) {
|
||||
if (_pending_jvms != NULL &&
|
||||
!_pending_jvms->same_calls_as(jvms) &&
|
||||
_pending_offset < pc_offset) {
|
||||
emit_non_safepoint();
|
||||
}
|
||||
_pending_jvms = NULL;
|
||||
}
|
||||
|
||||
void flush_at_end() {
|
||||
if (_pending_jvms != NULL) {
|
||||
emit_non_safepoint();
|
||||
}
|
||||
_pending_jvms = NULL;
|
||||
}
|
||||
};
|
||||
|
||||
void NonSafepointEmitter::emit_non_safepoint() {
|
||||
@ -952,15 +956,11 @@ void NonSafepointEmitter::emit_non_safepoint() {
|
||||
}
|
||||
|
||||
//------------------------------init_buffer------------------------------------
|
||||
CodeBuffer* Compile::init_buffer(uint* blk_starts) {
|
||||
void Compile::estimate_buffer_size(int& const_req) {
|
||||
|
||||
// Set the initially allocated size
|
||||
int code_req = initial_code_capacity;
|
||||
int locs_req = initial_locs_capacity;
|
||||
int stub_req = initial_stub_capacity;
|
||||
int const_req = initial_const_capacity;
|
||||
const_req = initial_const_capacity;
|
||||
|
||||
int pad_req = NativeCall::instruction_size;
|
||||
// The extra spacing after the code is necessary on some platforms.
|
||||
// Sometimes we need to patch in a jump after the last instruction,
|
||||
// if the nmethod has been deoptimized. (See 4932387, 4894843.)
|
||||
@ -972,7 +972,7 @@ CodeBuffer* Compile::init_buffer(uint* blk_starts) {
|
||||
|
||||
// Compute prolog code size
|
||||
_method_size = 0;
|
||||
_frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize;
|
||||
_frame_slots = OptoReg::reg2stack(_matcher->_old_SP) + _regalloc->_framesize;
|
||||
#if defined(IA64) && !defined(AIX)
|
||||
if (save_argument_registers()) {
|
||||
// 4815101: this is a stub with implicit and unknown precision fp args.
|
||||
@ -1021,11 +1021,18 @@ CodeBuffer* Compile::init_buffer(uint* blk_starts) {
|
||||
// Initialize the space for the BufferBlob used to find and verify
|
||||
// instruction size in MachNode::emit_size()
|
||||
init_scratch_buffer_blob(const_req);
|
||||
if (failing()) return NULL; // Out of memory
|
||||
}
|
||||
|
||||
// Pre-compute the length of blocks and replace
|
||||
// long branches with short if machine supports it.
|
||||
shorten_branches(blk_starts, code_req, locs_req, stub_req);
|
||||
CodeBuffer* Compile::init_buffer(BufferSizingData& buf_sizes) {
|
||||
|
||||
int stub_req = buf_sizes._stub;
|
||||
int code_req = buf_sizes._code;
|
||||
int const_req = buf_sizes._const;
|
||||
|
||||
int pad_req = NativeCall::instruction_size;
|
||||
|
||||
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
|
||||
stub_req += bs->estimate_stub_size();
|
||||
|
||||
// nmethod and CodeBuffer count stubs & constants as part of method's code.
|
||||
// class HandlerImpl is platform-specific and defined in the *.ad files.
|
||||
@ -1038,18 +1045,18 @@ CodeBuffer* Compile::init_buffer(uint* blk_starts) {
|
||||
code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion
|
||||
|
||||
int total_req =
|
||||
const_req +
|
||||
code_req +
|
||||
pad_req +
|
||||
stub_req +
|
||||
exception_handler_req +
|
||||
deopt_handler_req; // deopt handler
|
||||
const_req +
|
||||
code_req +
|
||||
pad_req +
|
||||
stub_req +
|
||||
exception_handler_req +
|
||||
deopt_handler_req; // deopt handler
|
||||
|
||||
if (has_method_handle_invokes())
|
||||
total_req += deopt_handler_req; // deopt MH handler
|
||||
|
||||
CodeBuffer* cb = code_buffer();
|
||||
cb->initialize(total_req, locs_req);
|
||||
cb->initialize(total_req, buf_sizes._reloc);
|
||||
|
||||
// Have we run out of code space?
|
||||
if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
|
||||
@ -1268,12 +1275,12 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
Process_OopMap_Node(mach, current_offset);
|
||||
} // End if safepoint
|
||||
|
||||
// If this is a null check, then add the start of the previous instruction to the list
|
||||
// If this is a null check, then add the start of the previous instruction to the list
|
||||
else if( mach->is_MachNullCheck() ) {
|
||||
inct_starts[inct_cnt++] = previous_offset;
|
||||
}
|
||||
|
||||
// If this is a branch, then fill in the label with the target BB's label
|
||||
// If this is a branch, then fill in the label with the target BB's label
|
||||
else if (mach->is_MachBranch()) {
|
||||
// This requires the TRUE branch target be in succs[0]
|
||||
uint block_num = block->non_connector_successor(0)->_pre_order;
|
||||
@ -1284,8 +1291,8 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
bool delay_slot_is_used = valid_bundle_info(n) &&
|
||||
node_bundling(n)->use_unconditional_delay();
|
||||
if (!delay_slot_is_used && mach->may_be_short_branch()) {
|
||||
assert(delay_slot == NULL, "not expecting delay slot node");
|
||||
int br_size = n->size(_regalloc);
|
||||
assert(delay_slot == NULL, "not expecting delay slot node");
|
||||
int br_size = n->size(_regalloc);
|
||||
int offset = blk_starts[block_num] - current_offset;
|
||||
if (block_num >= i) {
|
||||
// Current and following block's offset are not
|
||||
@ -1343,7 +1350,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
}
|
||||
}
|
||||
#ifdef ASSERT
|
||||
// Check that oop-store precedes the card-mark
|
||||
// Check that oop-store precedes the card-mark
|
||||
else if (mach->ideal_Opcode() == Op_StoreCM) {
|
||||
uint storeCM_idx = j;
|
||||
int count = 0;
|
||||
@ -1514,6 +1521,10 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
}
|
||||
#endif
|
||||
|
||||
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
|
||||
bs->emit_stubs(*cb);
|
||||
if (failing()) return;
|
||||
|
||||
#ifndef PRODUCT
|
||||
// Information on the size of the method, without the extraneous code
|
||||
Scheduling::increment_method_size(cb->insts_size());
|
||||
@ -1688,20 +1699,20 @@ uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+
|
||||
// Initializer for class Scheduling
|
||||
|
||||
Scheduling::Scheduling(Arena *arena, Compile &compile)
|
||||
: _arena(arena),
|
||||
_cfg(compile.cfg()),
|
||||
_regalloc(compile.regalloc()),
|
||||
_scheduled(arena),
|
||||
_available(arena),
|
||||
_reg_node(arena),
|
||||
_pinch_free_list(arena),
|
||||
_next_node(NULL),
|
||||
_bundle_instr_count(0),
|
||||
_bundle_cycle_number(0),
|
||||
_bundle_use(0, 0, resource_count, &_bundle_use_elements[0])
|
||||
: _arena(arena),
|
||||
_cfg(compile.cfg()),
|
||||
_regalloc(compile.regalloc()),
|
||||
_scheduled(arena),
|
||||
_available(arena),
|
||||
_reg_node(arena),
|
||||
_pinch_free_list(arena),
|
||||
_next_node(NULL),
|
||||
_bundle_instr_count(0),
|
||||
_bundle_cycle_number(0),
|
||||
_bundle_use(0, 0, resource_count, &_bundle_use_elements[0])
|
||||
#ifndef PRODUCT
|
||||
, _branches(0)
|
||||
, _unconditional_delays(0)
|
||||
, _branches(0)
|
||||
, _unconditional_delays(0)
|
||||
#endif
|
||||
{
|
||||
// Create a MachNopNode
|
||||
@ -1782,8 +1793,8 @@ void Scheduling::step_and_clear() {
|
||||
_bundle_use.reset();
|
||||
|
||||
memcpy(_bundle_use_elements,
|
||||
Pipeline_Use::elaborated_elements,
|
||||
sizeof(Pipeline_Use::elaborated_elements));
|
||||
Pipeline_Use::elaborated_elements,
|
||||
sizeof(Pipeline_Use::elaborated_elements));
|
||||
}
|
||||
|
||||
// Perform instruction scheduling and bundling over the sequence of
|
||||
@ -1810,6 +1821,22 @@ void Compile::ScheduleAndBundle() {
|
||||
// Walk backwards over each basic block, computing the needed alignment
|
||||
// Walk over all the basic blocks
|
||||
scheduling.DoScheduling();
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (trace_opto_output()) {
|
||||
tty->print("\n---- After ScheduleAndBundle ----\n");
|
||||
for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
|
||||
tty->print("\nBB#%03d:\n", i);
|
||||
Block* block = _cfg->get_block(i);
|
||||
for (uint j = 0; j < block->number_of_nodes(); j++) {
|
||||
Node* n = block->get_node(j);
|
||||
OptoReg::Name reg = _regalloc->get_reg_first(n);
|
||||
tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
|
||||
n->dump();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Compute the latency of all the instructions. This is fairly simple,
|
||||
@ -1878,7 +1905,7 @@ bool Scheduling::NodeFitsInBundle(Node *n) {
|
||||
#ifndef PRODUCT
|
||||
if (_cfg->C->trace_opto_output())
|
||||
tty->print("# NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n",
|
||||
n->_idx, _current_latency[n_idx], _bundle_cycle_number);
|
||||
n->_idx, _current_latency[n_idx], _bundle_cycle_number);
|
||||
#endif
|
||||
return (false);
|
||||
}
|
||||
@ -1895,7 +1922,7 @@ bool Scheduling::NodeFitsInBundle(Node *n) {
|
||||
#ifndef PRODUCT
|
||||
if (_cfg->C->trace_opto_output())
|
||||
tty->print("# NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n",
|
||||
n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
|
||||
n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
|
||||
#endif
|
||||
return (false);
|
||||
}
|
||||
@ -2103,12 +2130,12 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
|
||||
// Don't allow safepoints in the branch shadow, that will
|
||||
// cause a number of difficulties
|
||||
if ( avail_pipeline->instructionCount() == 1 &&
|
||||
!avail_pipeline->hasMultipleBundles() &&
|
||||
!avail_pipeline->hasBranchDelay() &&
|
||||
Pipeline::instr_has_unit_size() &&
|
||||
d->size(_regalloc) == Pipeline::instr_unit_size() &&
|
||||
NodeFitsInBundle(d) &&
|
||||
!node_bundling(d)->used_in_delay()) {
|
||||
!avail_pipeline->hasMultipleBundles() &&
|
||||
!avail_pipeline->hasBranchDelay() &&
|
||||
Pipeline::instr_has_unit_size() &&
|
||||
d->size(_regalloc) == Pipeline::instr_unit_size() &&
|
||||
NodeFitsInBundle(d) &&
|
||||
!node_bundling(d)->used_in_delay()) {
|
||||
|
||||
if (d->is_Mach() && !d->is_MachSafePoint()) {
|
||||
// A node that fits in the delay slot was found, so we need to
|
||||
@ -2153,13 +2180,13 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
|
||||
// step of the bundles
|
||||
if (!NodeFitsInBundle(n)) {
|
||||
#ifndef PRODUCT
|
||||
if (_cfg->C->trace_opto_output())
|
||||
tty->print("# *** STEP(branch won't fit) ***\n");
|
||||
if (_cfg->C->trace_opto_output())
|
||||
tty->print("# *** STEP(branch won't fit) ***\n");
|
||||
#endif
|
||||
// Update the state information
|
||||
_bundle_instr_count = 0;
|
||||
_bundle_cycle_number += 1;
|
||||
_bundle_use.step(1);
|
||||
// Update the state information
|
||||
_bundle_instr_count = 0;
|
||||
_bundle_cycle_number += 1;
|
||||
_bundle_use.step(1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2205,8 +2232,8 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
|
||||
#ifndef PRODUCT
|
||||
if (_cfg->C->trace_opto_output())
|
||||
tty->print("# *** STEP(%d >= %d instructions) ***\n",
|
||||
instruction_count + _bundle_instr_count,
|
||||
Pipeline::_max_instrs_per_cycle);
|
||||
instruction_count + _bundle_instr_count,
|
||||
Pipeline::_max_instrs_per_cycle);
|
||||
#endif
|
||||
step(1);
|
||||
}
|
||||
@ -2412,7 +2439,7 @@ void Scheduling::DoScheduling() {
|
||||
}
|
||||
assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, "");
|
||||
if( last->is_Catch() ||
|
||||
(last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
|
||||
(last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
|
||||
// There might be a prior call. Skip it.
|
||||
while (_bb_start < _bb_end && bb->get_node(--_bb_end)->is_MachProj());
|
||||
} else if( last->is_MachNullCheck() ) {
|
||||
@ -2482,7 +2509,7 @@ void Scheduling::DoScheduling() {
|
||||
}
|
||||
#endif
|
||||
#ifdef ASSERT
|
||||
verify_good_schedule(bb,"after block local scheduling");
|
||||
verify_good_schedule(bb,"after block local scheduling");
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -2830,31 +2857,31 @@ void Scheduling::ComputeRegisterAntidependencies(Block *b) {
|
||||
//
|
||||
void Scheduling::garbage_collect_pinch_nodes() {
|
||||
#ifndef PRODUCT
|
||||
if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
|
||||
if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
|
||||
#endif
|
||||
int trace_cnt = 0;
|
||||
for (uint k = 0; k < _reg_node.Size(); k++) {
|
||||
Node* pinch = _reg_node[k];
|
||||
if ((pinch != NULL) && pinch->Opcode() == Op_Node &&
|
||||
// no predecence input edges
|
||||
(pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
|
||||
cleanup_pinch(pinch);
|
||||
_pinch_free_list.push(pinch);
|
||||
_reg_node.map(k, NULL);
|
||||
int trace_cnt = 0;
|
||||
for (uint k = 0; k < _reg_node.Size(); k++) {
|
||||
Node* pinch = _reg_node[k];
|
||||
if ((pinch != NULL) && pinch->Opcode() == Op_Node &&
|
||||
// no predecence input edges
|
||||
(pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
|
||||
cleanup_pinch(pinch);
|
||||
_pinch_free_list.push(pinch);
|
||||
_reg_node.map(k, NULL);
|
||||
#ifndef PRODUCT
|
||||
if (_cfg->C->trace_opto_output()) {
|
||||
trace_cnt++;
|
||||
if (trace_cnt > 40) {
|
||||
tty->print("\n");
|
||||
trace_cnt = 0;
|
||||
}
|
||||
tty->print(" %d", pinch->_idx);
|
||||
if (_cfg->C->trace_opto_output()) {
|
||||
trace_cnt++;
|
||||
if (trace_cnt > 40) {
|
||||
tty->print("\n");
|
||||
trace_cnt = 0;
|
||||
}
|
||||
#endif
|
||||
tty->print(" %d", pinch->_idx);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#ifndef PRODUCT
|
||||
if (_cfg->C->trace_opto_output()) tty->print("\n");
|
||||
if (_cfg->C->trace_opto_output()) tty->print("\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -2891,19 +2918,19 @@ void Scheduling::dump_available() const {
|
||||
void Scheduling::print_statistics() {
|
||||
// Print the size added by nops for bundling
|
||||
tty->print("Nops added %d bytes to total of %d bytes",
|
||||
_total_nop_size, _total_method_size);
|
||||
_total_nop_size, _total_method_size);
|
||||
if (_total_method_size > 0)
|
||||
tty->print(", for %.2f%%",
|
||||
((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
|
||||
((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
|
||||
tty->print("\n");
|
||||
|
||||
// Print the number of branch shadows filled
|
||||
if (Pipeline::_branch_has_delay_slot) {
|
||||
tty->print("Of %d branches, %d had unconditional delay slots filled",
|
||||
_total_branches, _total_unconditional_delays);
|
||||
_total_branches, _total_unconditional_delays);
|
||||
if (_total_branches > 0)
|
||||
tty->print(", for %.2f%%",
|
||||
((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
|
||||
((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
|
||||
tty->print("\n");
|
||||
}
|
||||
|
||||
@ -2917,6 +2944,6 @@ void Scheduling::print_statistics() {
|
||||
|
||||
if (total_bundles > 0)
|
||||
tty->print("Average ILP (excluding nops) is %.2f\n",
|
||||
((double)total_instructions) / ((double)total_bundles));
|
||||
((double)total_instructions) / ((double)total_bundles));
|
||||
}
|
||||
#endif
|
||||
|
@ -40,7 +40,6 @@ class PhaseCFG;
|
||||
class PhaseChaitin;
|
||||
class Pipeline_Use_Element;
|
||||
class Pipeline_Use;
|
||||
|
||||
#ifndef PRODUCT
|
||||
#define DEBUG_ARG(x) , x
|
||||
#else
|
||||
@ -49,10 +48,7 @@ class Pipeline_Use;
|
||||
|
||||
// Define the initial sizes for allocation of the resizable code buffer
|
||||
enum {
|
||||
initial_code_capacity = 16 * 1024,
|
||||
initial_stub_capacity = 4 * 1024,
|
||||
initial_const_capacity = 4 * 1024,
|
||||
initial_locs_capacity = 3 * 1024
|
||||
initial_const_capacity = 4 * 1024
|
||||
};
|
||||
|
||||
//------------------------------Scheduling----------------------------------
|
||||
|
@ -1648,14 +1648,14 @@ void PhaseIterGVN::add_users_to_worklist( Node *n ) {
|
||||
// of the mirror load depends on the type of 'n'. See LoadNode::Value().
|
||||
// LoadBarrier?(LoadP(LoadP(AddP(foo:Klass, #java_mirror))))
|
||||
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
|
||||
bool has_load_barriers = bs->has_load_barriers();
|
||||
bool has_load_barrier_nodes = bs->has_load_barrier_nodes();
|
||||
|
||||
if (use_op == Op_LoadP && use->bottom_type()->isa_rawptr()) {
|
||||
for (DUIterator_Fast i2max, i2 = use->fast_outs(i2max); i2 < i2max; i2++) {
|
||||
Node* u = use->fast_out(i2);
|
||||
const Type* ut = u->bottom_type();
|
||||
if (u->Opcode() == Op_LoadP && ut->isa_instptr()) {
|
||||
if (has_load_barriers) {
|
||||
if (has_load_barrier_nodes) {
|
||||
// Search for load barriers behind the load
|
||||
for (DUIterator_Fast i3max, i3 = u->fast_outs(i3max); i3 < i3max; i3++) {
|
||||
Node* b = u->fast_out(i3);
|
||||
@ -1808,14 +1808,14 @@ void PhaseCCP::analyze() {
|
||||
// Loading the java mirror from a Klass requires two loads and the type
|
||||
// of the mirror load depends on the type of 'n'. See LoadNode::Value().
|
||||
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
|
||||
bool has_load_barriers = bs->has_load_barriers();
|
||||
bool has_load_barrier_nodes = bs->has_load_barrier_nodes();
|
||||
|
||||
if (m_op == Op_LoadP && m->bottom_type()->isa_rawptr()) {
|
||||
for (DUIterator_Fast i2max, i2 = m->fast_outs(i2max); i2 < i2max; i2++) {
|
||||
Node* u = m->fast_out(i2);
|
||||
const Type* ut = u->bottom_type();
|
||||
if (u->Opcode() == Op_LoadP && ut->isa_instptr() && ut != type(u)) {
|
||||
if (has_load_barriers) {
|
||||
if (has_load_barrier_nodes) {
|
||||
// Search for load barriers behind the load
|
||||
for (DUIterator_Fast i3max, i3 = u->fast_outs(i3max); i3 < i3max; i3++) {
|
||||
Node* b = u->fast_out(i3);
|
||||
|
Loading…
Reference in New Issue
Block a user