8290700: Optimize AArch64 nmethod entry barriers
Reviewed-by: kvn, dlong
This commit is contained in:
parent
852e71d9f0
commit
228e8e94fe
@ -1920,7 +1920,24 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
|
||||
if (C->stub_function() == NULL) {
|
||||
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->nmethod_entry_barrier(&_masm);
|
||||
if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
|
||||
// Dummy labels for just measuring the code size
|
||||
Label dummy_slow_path;
|
||||
Label dummy_continuation;
|
||||
Label dummy_guard;
|
||||
Label* slow_path = &dummy_slow_path;
|
||||
Label* continuation = &dummy_continuation;
|
||||
Label* guard = &dummy_guard;
|
||||
if (!Compile::current()->output()->in_scratch_emit_size()) {
|
||||
// Use real labels from actual stub when not emitting code for the purpose of measuring its size
|
||||
C2EntryBarrierStub* stub = Compile::current()->output()->entry_barrier_table()->add_entry_barrier();
|
||||
slow_path = &stub->slow_path();
|
||||
continuation = &stub->continuation();
|
||||
guard = &stub->guard();
|
||||
}
|
||||
// In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
|
||||
bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
|
||||
}
|
||||
}
|
||||
|
||||
if (VerifyStackAtCalls) {
|
||||
|
@ -298,7 +298,7 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
|
||||
|
||||
// Insert nmethod entry barrier into frame.
|
||||
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->nmethod_entry_barrier(this);
|
||||
bs->nmethod_entry_barrier(this, NULL /* slow_path */, NULL /* continuation */, NULL /* guard */);
|
||||
}
|
||||
|
||||
void C1_MacroAssembler::remove_frame(int framesize) {
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "opto/c2_MacroAssembler.hpp"
|
||||
#include "opto/intrinsicnode.hpp"
|
||||
#include "opto/matcher.hpp"
|
||||
#include "opto/output.hpp"
|
||||
#include "opto/subnode.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
|
||||
@ -43,6 +44,21 @@
|
||||
|
||||
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
|
||||
|
||||
void C2_MacroAssembler::emit_entry_barrier_stub(C2EntryBarrierStub* stub) {
|
||||
bind(stub->slow_path());
|
||||
movptr(rscratch1, (uintptr_t) StubRoutines::aarch64::method_entry_barrier());
|
||||
blr(rscratch1);
|
||||
b(stub->continuation());
|
||||
|
||||
bind(stub->guard());
|
||||
relocate(entry_guard_Relocation::spec());
|
||||
emit_int32(0); // nmethod guard value
|
||||
}
|
||||
|
||||
int C2_MacroAssembler::entry_barrier_stub_size() {
|
||||
return 4 * 6;
|
||||
}
|
||||
|
||||
// Search for str1 in str2 and return index or -1
|
||||
void C2_MacroAssembler::string_indexof(Register str2, Register str1,
|
||||
Register cnt2, Register cnt1,
|
||||
|
@ -28,8 +28,8 @@
|
||||
// C2_MacroAssembler contains high-level macros for C2
|
||||
|
||||
public:
|
||||
void emit_entry_barrier_stub(C2EntryBarrierStub* stub) {}
|
||||
static int entry_barrier_stub_size() { return 0; }
|
||||
void emit_entry_barrier_stub(C2EntryBarrierStub* stub);
|
||||
static int entry_barrier_stub_size();
|
||||
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
|
@ -246,18 +246,38 @@ void BarrierSetAssembler::clear_patching_epoch() {
|
||||
_patching_epoch = 0;
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
|
||||
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard) {
|
||||
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
|
||||
|
||||
if (bs_nm == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
Label skip_barrier, guard;
|
||||
Label local_guard;
|
||||
Label skip_barrier;
|
||||
NMethodPatchingType patching_type = nmethod_patching_type();
|
||||
|
||||
__ ldrw(rscratch1, guard);
|
||||
if (slow_path == NULL) {
|
||||
guard = &local_guard;
|
||||
}
|
||||
|
||||
if (nmethod_code_patching()) {
|
||||
// If the slow path is out of line in a stub, we flip the condition
|
||||
Assembler::Condition condition = slow_path == NULL ? Assembler::EQ : Assembler::NE;
|
||||
Label& barrier_target = slow_path == NULL ? skip_barrier : *slow_path;
|
||||
|
||||
__ ldrw(rscratch1, *guard);
|
||||
|
||||
if (patching_type == NMethodPatchingType::stw_instruction_and_data_patch) {
|
||||
// With STW patching, no data or instructions are updated concurrently,
|
||||
// which means there isn't really any need for any fencing for neither
|
||||
// data nor instruction modifications happening concurrently. The
|
||||
// instruction patching is handled with isb fences on the way back
|
||||
// from the safepoint to Java. So here we can do a plain conditional
|
||||
// branch with no fencing.
|
||||
Address thread_disarmed_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset()));
|
||||
__ ldrw(rscratch2, thread_disarmed_addr);
|
||||
__ cmp(rscratch1, rscratch2);
|
||||
} else if (patching_type == NMethodPatchingType::conc_instruction_and_data_patch) {
|
||||
// If we patch code we need both a code patching and a loadload
|
||||
// fence. It's not super cheap, so we use a global epoch mechanism
|
||||
// to hide them in a slow path.
|
||||
@ -278,24 +298,28 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
|
||||
Address thread_disarmed_and_epoch_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset()));
|
||||
__ ldr(rscratch2, thread_disarmed_and_epoch_addr);
|
||||
__ cmp(rscratch1, rscratch2);
|
||||
__ br(Assembler::EQ, skip_barrier);
|
||||
} else {
|
||||
assert(patching_type == NMethodPatchingType::conc_data_patch, "must be");
|
||||
// Subsequent loads of oops must occur after load of guard value.
|
||||
// BarrierSetNMethod::disarm sets guard with release semantics.
|
||||
__ membar(__ LoadLoad);
|
||||
Address thread_disarmed_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset()));
|
||||
__ ldrw(rscratch2, thread_disarmed_addr);
|
||||
__ cmpw(rscratch1, rscratch2);
|
||||
__ br(Assembler::EQ, skip_barrier);
|
||||
}
|
||||
__ br(condition, barrier_target);
|
||||
|
||||
__ movptr(rscratch1, (uintptr_t) StubRoutines::aarch64::method_entry_barrier());
|
||||
__ blr(rscratch1);
|
||||
__ b(skip_barrier);
|
||||
if (slow_path == NULL) {
|
||||
__ movptr(rscratch1, (uintptr_t) StubRoutines::aarch64::method_entry_barrier());
|
||||
__ blr(rscratch1);
|
||||
__ b(skip_barrier);
|
||||
|
||||
__ bind(guard);
|
||||
__ bind(local_guard);
|
||||
|
||||
__ emit_int32(0); // nmethod guard value. Skipped over in common case.
|
||||
__ emit_int32(0); // nmethod guard value. Skipped over in common case.
|
||||
} else {
|
||||
__ bind(*continuation);
|
||||
}
|
||||
|
||||
__ bind(skip_barrier);
|
||||
}
|
||||
|
@ -31,6 +31,12 @@
|
||||
#include "memory/allocation.hpp"
|
||||
#include "oops/access.hpp"
|
||||
|
||||
enum class NMethodPatchingType {
|
||||
stw_instruction_and_data_patch,
|
||||
conc_instruction_and_data_patch,
|
||||
conc_data_patch
|
||||
};
|
||||
|
||||
class BarrierSetAssembler: public CHeapObj<mtGC> {
|
||||
private:
|
||||
void incr_allocated_bytes(MacroAssembler* masm,
|
||||
@ -68,9 +74,9 @@ public:
|
||||
);
|
||||
virtual void barrier_stubs_init() {}
|
||||
|
||||
virtual bool nmethod_code_patching() { return true; }
|
||||
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::stw_instruction_and_data_patch; }
|
||||
|
||||
virtual void nmethod_entry_barrier(MacroAssembler* masm);
|
||||
virtual void nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard);
|
||||
virtual void c2i_entry_barrier(MacroAssembler* masm);
|
||||
|
||||
static address patching_epoch_addr();
|
||||
|
@ -37,29 +37,62 @@
|
||||
#include "utilities/align.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
|
||||
static int slow_path_size(nmethod* nm) {
|
||||
// The slow path code is out of line with C2
|
||||
return nm->is_compiled_by_c2() ? 0 : 6;
|
||||
}
|
||||
|
||||
// This is the offset of the entry barrier from where the frame is completed.
|
||||
// If any code changes between the end of the verified entry where the entry
|
||||
// barrier resides, and the completion of the frame, then
|
||||
// NativeNMethodCmpBarrier::verify() will immediately complain when it does
|
||||
// not find the expected native instruction at this offset, which needs updating.
|
||||
// Note that this offset is invariant of PreserveFramePointer.
|
||||
static int entry_barrier_offset(nmethod* nm) {
|
||||
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
switch (bs_asm->nmethod_patching_type()) {
|
||||
case NMethodPatchingType::stw_instruction_and_data_patch:
|
||||
return -4 * (4 + slow_path_size(nm));
|
||||
case NMethodPatchingType::conc_instruction_and_data_patch:
|
||||
return -4 * (10 + slow_path_size(nm));
|
||||
case NMethodPatchingType::conc_data_patch:
|
||||
return -4 * (5 + slow_path_size(nm));
|
||||
}
|
||||
ShouldNotReachHere();
|
||||
return 0;
|
||||
}
|
||||
|
||||
class NativeNMethodBarrier: public NativeInstruction {
|
||||
address instruction_address() const { return addr_at(0); }
|
||||
|
||||
int guard_offset() {
|
||||
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
if (bs_asm->nmethod_code_patching()) {
|
||||
return 4 * 15;
|
||||
} else {
|
||||
return 4 * 10;
|
||||
}
|
||||
int local_guard_offset(nmethod* nm) {
|
||||
// It's the last instruction
|
||||
return (-entry_barrier_offset(nm)) - 4;
|
||||
}
|
||||
|
||||
int *guard_addr() {
|
||||
return reinterpret_cast<int*>(instruction_address() + guard_offset());
|
||||
int *guard_addr(nmethod* nm) {
|
||||
if (nm->is_compiled_by_c2()) {
|
||||
// With c2 compiled code, the guard is out-of-line in a stub
|
||||
// We find it using the RelocIterator.
|
||||
RelocIterator iter(nm);
|
||||
while (iter.next()) {
|
||||
if (iter.type() == relocInfo::entry_guard_type) {
|
||||
entry_guard_Relocation* const reloc = iter.entry_guard_reloc();
|
||||
return reinterpret_cast<int*>(reloc->addr());
|
||||
}
|
||||
}
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
return reinterpret_cast<int*>(instruction_address() + local_guard_offset(nm));
|
||||
}
|
||||
|
||||
public:
|
||||
int get_value() {
|
||||
return Atomic::load_acquire(guard_addr());
|
||||
int get_value(nmethod* nm) {
|
||||
return Atomic::load_acquire(guard_addr(nm));
|
||||
}
|
||||
|
||||
void set_value(int value) {
|
||||
Atomic::release_store(guard_addr(), value);
|
||||
void set_value(nmethod* nm, int value) {
|
||||
Atomic::release_store(guard_addr(nm), value);
|
||||
}
|
||||
|
||||
void verify() const;
|
||||
@ -120,24 +153,8 @@ void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
|
||||
new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
|
||||
}
|
||||
|
||||
// This is the offset of the entry barrier from where the frame is completed.
|
||||
// If any code changes between the end of the verified entry where the entry
|
||||
// barrier resides, and the completion of the frame, then
|
||||
// NativeNMethodCmpBarrier::verify() will immediately complain when it does
|
||||
// not find the expected native instruction at this offset, which needs updating.
|
||||
// Note that this offset is invariant of PreserveFramePointer.
|
||||
|
||||
static int entry_barrier_offset() {
|
||||
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
if (bs_asm->nmethod_code_patching()) {
|
||||
return -4 * 16;
|
||||
} else {
|
||||
return -4 * 11;
|
||||
}
|
||||
}
|
||||
|
||||
static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
|
||||
address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset();
|
||||
address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm);
|
||||
NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
|
||||
debug_only(barrier->verify());
|
||||
return barrier;
|
||||
@ -160,7 +177,7 @@ void BarrierSetNMethod::disarm(nmethod* nm) {
|
||||
// Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
|
||||
// Symmetric "LDR; DMB ISHLD" is in the nmethod barrier.
|
||||
NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
|
||||
barrier->set_value(disarmed_value());
|
||||
barrier->set_value(nm, disarmed_value());
|
||||
}
|
||||
|
||||
void BarrierSetNMethod::arm(nmethod* nm, int arm_value) {
|
||||
@ -180,7 +197,7 @@ void BarrierSetNMethod::arm(nmethod* nm, int arm_value) {
|
||||
}
|
||||
|
||||
NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
|
||||
barrier->set_value(arm_value);
|
||||
barrier->set_value(nm, arm_value);
|
||||
}
|
||||
|
||||
bool BarrierSetNMethod::is_armed(nmethod* nm) {
|
||||
@ -189,5 +206,5 @@ bool BarrierSetNMethod::is_armed(nmethod* nm) {
|
||||
}
|
||||
|
||||
NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
|
||||
return barrier->get_value() != disarmed_value();
|
||||
return barrier->get_value(nm) != disarmed_value();
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ public:
|
||||
|
||||
void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
|
||||
|
||||
virtual bool nmethod_code_patching() { return false; }
|
||||
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::conc_data_patch; }
|
||||
|
||||
#ifdef COMPILER1
|
||||
void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
|
||||
|
@ -76,7 +76,7 @@ public:
|
||||
Register tmp,
|
||||
Label& slowpath);
|
||||
|
||||
virtual bool nmethod_code_patching() { return false; }
|
||||
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::conc_data_patch; }
|
||||
|
||||
#ifdef COMPILER1
|
||||
void generate_c1_load_barrier_test(LIR_Assembler* ce,
|
||||
|
@ -4475,7 +4475,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
|
||||
// ordered with respected to oop accesses.
|
||||
// Using immediate literals would necessitate ISBs.
|
||||
BarrierSet* bs = BarrierSet::barrier_set();
|
||||
if ((bs->barrier_set_nmethod() != NULL && !bs->barrier_set_assembler()->nmethod_code_patching()) || !immediate) {
|
||||
if ((bs->barrier_set_nmethod() != NULL && bs->barrier_set_assembler()->nmethod_patching_type() == NMethodPatchingType::conc_data_patch) || !immediate) {
|
||||
address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
|
||||
ldr_constant(dst, Address(dummy, rspec));
|
||||
} else
|
||||
|
@ -1424,7 +1424,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
|
||||
__ sub(sp, sp, stack_size - 2*wordSize);
|
||||
|
||||
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->nmethod_entry_barrier(masm);
|
||||
bs->nmethod_entry_barrier(masm, NULL /* slow_path */, NULL /* continuation */, NULL /* guard */);
|
||||
|
||||
// Frame is now completed as far as size and linkage.
|
||||
int frame_complete = ((intptr_t)__ pc()) - start;
|
||||
|
@ -5145,7 +5145,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return entry;
|
||||
}
|
||||
|
||||
address generate_method_entry_barrier() {
|
||||
address generate_method_entry_barrier() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
|
||||
|
||||
@ -5155,10 +5155,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
|
||||
if (bs_asm->nmethod_code_patching()) {
|
||||
if (bs_asm->nmethod_patching_type() == NMethodPatchingType::conc_instruction_and_data_patch) {
|
||||
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
|
||||
// We can get here despite the nmethod being good, if we have not
|
||||
// yet applied our cross modification fence.
|
||||
// yet applied our cross modification fence (or data fence).
|
||||
Address thread_epoch_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset()) + 4);
|
||||
__ lea(rscratch2, ExternalAddress(bs_asm->patching_epoch_addr()));
|
||||
__ ldrw(rscratch2, rscratch2);
|
||||
|
@ -269,6 +269,7 @@ class relocInfo {
|
||||
runtime_call_w_cp_type = 14, // Runtime call which may load its target from the constant pool
|
||||
data_prefix_tag = 15, // tag for a prefix (carries data arguments)
|
||||
post_call_nop_type = 16, // A tag for post call nop relocations
|
||||
entry_guard_type = 17, // A tag for an nmethod entry barrier guard value
|
||||
type_mask = 31 // A mask which selects only the above values
|
||||
};
|
||||
|
||||
@ -309,6 +310,7 @@ class relocInfo {
|
||||
visitor(section_word) \
|
||||
visitor(trampoline_stub) \
|
||||
visitor(post_call_nop) \
|
||||
visitor(entry_guard) \
|
||||
|
||||
|
||||
public:
|
||||
@ -883,6 +885,19 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class entry_guard_Relocation : public Relocation {
|
||||
friend class RelocIterator;
|
||||
|
||||
public:
|
||||
entry_guard_Relocation() : Relocation(relocInfo::entry_guard_type) { }
|
||||
|
||||
static RelocationHolder spec() {
|
||||
RelocationHolder rh = newHolder();
|
||||
new(rh) entry_guard_Relocation();
|
||||
return rh;
|
||||
}
|
||||
};
|
||||
|
||||
// A CallRelocation always points at a call instruction.
|
||||
// It is PC-relative on most machines.
|
||||
class CallRelocation : public Relocation {
|
||||
|
@ -118,14 +118,18 @@ public:
|
||||
class C2EntryBarrierStub: public ResourceObj {
|
||||
Label _slow_path;
|
||||
Label _continuation;
|
||||
Label _guard; // Used on AArch64
|
||||
|
||||
public:
|
||||
C2EntryBarrierStub() :
|
||||
_slow_path(),
|
||||
_continuation() {}
|
||||
_continuation(),
|
||||
_guard() {}
|
||||
|
||||
Label& slow_path() { return _slow_path; }
|
||||
Label& continuation() { return _continuation; }
|
||||
Label& guard() { return _guard; }
|
||||
|
||||
};
|
||||
|
||||
class C2EntryBarrierStubTable {
|
||||
|
Loading…
x
Reference in New Issue
Block a user