8290688: Optimize x86_64 nmethod entry barriers
Reviewed-by: kvn, rrich
This commit is contained in:
parent
54854d9300
commit
b28f9dab80
@ -28,6 +28,8 @@
|
||||
// C2_MacroAssembler contains high-level macros for C2
|
||||
|
||||
public:
|
||||
void emit_entry_barrier_stub(C2EntryBarrierStub* stub) {}
|
||||
static int entry_barrier_stub_size() { return 0; }
|
||||
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
|
@ -28,6 +28,9 @@
|
||||
// C2_MacroAssembler contains high-level macros for C2
|
||||
|
||||
public:
|
||||
void emit_entry_barrier_stub(C2EntryBarrierStub* stub) {}
|
||||
static int entry_barrier_stub_size() { return 0; }
|
||||
|
||||
// Compare char[] arrays aligned to 4 bytes.
|
||||
void char_arrays_equals(Register ary1, Register ary2,
|
||||
Register limit, Register result,
|
||||
|
@ -28,6 +28,9 @@
|
||||
// C2_MacroAssembler contains high-level macros for C2
|
||||
|
||||
public:
|
||||
void emit_entry_barrier_stub(C2EntryBarrierStub* stub) {}
|
||||
static int entry_barrier_stub_size() { return 0; }
|
||||
|
||||
// Intrinsics for CompactStrings
|
||||
// Compress char[] to byte[] by compressing 16 bytes at once.
|
||||
void string_compress_16(Register src, Register dst, Register cnt,
|
||||
|
@ -36,6 +36,8 @@
|
||||
VectorRegister vrs,
|
||||
bool is_latin, Label& DONE);
|
||||
public:
|
||||
void emit_entry_barrier_stub(C2EntryBarrierStub* stub) {}
|
||||
static int entry_barrier_stub_size() { return 0; }
|
||||
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
|
@ -29,6 +29,9 @@
|
||||
// C2_MacroAssembler contains high-level macros for C2
|
||||
|
||||
public:
|
||||
void emit_entry_barrier_stub(C2EntryBarrierStub* stub) {}
|
||||
static int entry_barrier_stub_size() { return 0; }
|
||||
|
||||
//-------------------------------------------
|
||||
// Special String Intrinsics Implementation.
|
||||
//-------------------------------------------
|
||||
|
@ -325,7 +325,8 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by
|
||||
decrement(rsp, frame_size_in_bytes); // does not emit code for frame_size == 0
|
||||
|
||||
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->nmethod_entry_barrier(this);
|
||||
// C1 code is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub
|
||||
bs->nmethod_entry_barrier(this, NULL /* slow_path */, NULL /* continuation */);
|
||||
}
|
||||
|
||||
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "oops/methodData.hpp"
|
||||
#include "opto/c2_MacroAssembler.hpp"
|
||||
#include "opto/intrinsicnode.hpp"
|
||||
#include "opto/output.hpp"
|
||||
#include "opto/opcodes.hpp"
|
||||
#include "opto/subnode.hpp"
|
||||
#include "runtime/objectMonitor.hpp"
|
||||
@ -128,10 +129,38 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool
|
||||
|
||||
if (!is_stub) {
|
||||
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->nmethod_entry_barrier(this);
|
||||
#ifdef _LP64
|
||||
if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
|
||||
// We put the non-hot code of the nmethod entry barrier out-of-line in a stub.
|
||||
Label dummy_slow_path;
|
||||
Label dummy_continuation;
|
||||
Label* slow_path = &dummy_slow_path;
|
||||
Label* continuation = &dummy_continuation;
|
||||
if (!Compile::current()->output()->in_scratch_emit_size()) {
|
||||
// Use real labels from actual stub when not emitting code for the purpose of measuring its size
|
||||
C2EntryBarrierStub* stub = Compile::current()->output()->entry_barrier_table()->add_entry_barrier();
|
||||
slow_path = &stub->slow_path();
|
||||
continuation = &stub->continuation();
|
||||
}
|
||||
bs->nmethod_entry_barrier(this, slow_path, continuation);
|
||||
}
|
||||
#else
|
||||
// Don't bother with out-of-line nmethod entry barrier stub for x86_32.
|
||||
bs->nmethod_entry_barrier(this, NULL /* slow_path */, NULL /* continuation */);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::emit_entry_barrier_stub(C2EntryBarrierStub* stub) {
|
||||
bind(stub->slow_path());
|
||||
call(RuntimeAddress(StubRoutines::x86::method_entry_barrier()));
|
||||
jmp(stub->continuation(), false /* maybe_short */);
|
||||
}
|
||||
|
||||
int C2_MacroAssembler::entry_barrier_stub_size() {
|
||||
return 10;
|
||||
}
|
||||
|
||||
inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
|
||||
switch (vlen_in_bytes) {
|
||||
case 4: // fall-through
|
||||
|
@ -31,6 +31,9 @@ public:
|
||||
// C2 compiled method's prolog code.
|
||||
void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub);
|
||||
|
||||
void emit_entry_barrier_stub(C2EntryBarrierStub* stub);
|
||||
static int entry_barrier_stub_size();
|
||||
|
||||
Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
|
||||
|
||||
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
|
||||
|
@ -309,22 +309,34 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register th
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
|
||||
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation) {
|
||||
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
|
||||
if (bs_nm == NULL) {
|
||||
return;
|
||||
}
|
||||
Label continuation;
|
||||
Register thread = r15_thread;
|
||||
Address disarmed_addr(thread, in_bytes(bs_nm->thread_disarmed_offset()));
|
||||
__ align(8);
|
||||
// The immediate is the last 4 bytes, so if we align the start of the cmp
|
||||
// instruction to 4 bytes, we know that the second half of it is also 4
|
||||
// byte aligned, which means that the immediate will not cross a cache line
|
||||
__ align(4);
|
||||
uintptr_t before_cmp = (uintptr_t)__ pc();
|
||||
__ cmpl(disarmed_addr, 0);
|
||||
__ jcc(Assembler::equal, continuation);
|
||||
__ call(RuntimeAddress(StubRoutines::x86::method_entry_barrier()));
|
||||
__ bind(continuation);
|
||||
uintptr_t after_cmp = (uintptr_t)__ pc();
|
||||
guarantee(after_cmp - before_cmp == 8, "Wrong assumed instruction length");
|
||||
|
||||
if (slow_path != NULL) {
|
||||
__ jcc(Assembler::notEqual, *slow_path);
|
||||
__ bind(*continuation);
|
||||
} else {
|
||||
Label done;
|
||||
__ jccb(Assembler::equal, done);
|
||||
__ call(RuntimeAddress(StubRoutines::x86::method_entry_barrier()));
|
||||
__ bind(done);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
|
||||
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label*, Label*) {
|
||||
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
|
||||
if (bs_nm == NULL) {
|
||||
return;
|
||||
|
@ -68,7 +68,7 @@ public:
|
||||
|
||||
virtual void barrier_stubs_init() {}
|
||||
|
||||
virtual void nmethod_entry_barrier(MacroAssembler* masm);
|
||||
virtual void nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation);
|
||||
virtual void c2i_entry_barrier(MacroAssembler* masm);
|
||||
};
|
||||
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "runtime/sharedRuntime.hpp"
|
||||
#include "utilities/align.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
class NativeNMethodCmpBarrier: public NativeInstruction {
|
||||
public:
|
||||
@ -62,7 +63,7 @@ public:
|
||||
|
||||
#ifdef _LP64
|
||||
void NativeNMethodCmpBarrier::verify() const {
|
||||
if (((uintptr_t) instruction_address()) & 0x7) {
|
||||
if (((uintptr_t) instruction_address()) & 0x3) {
|
||||
fatal("Not properly aligned");
|
||||
}
|
||||
|
||||
@ -156,10 +157,20 @@ void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
|
||||
// NativeNMethodCmpBarrier::verify() will immediately complain when it does
|
||||
// not find the expected native instruction at this offset, which needs updating.
|
||||
// Note that this offset is invariant of PreserveFramePointer.
|
||||
static const int entry_barrier_offset = LP64_ONLY(-19) NOT_LP64(-18);
|
||||
static const int entry_barrier_offset(nmethod* nm) {
|
||||
#ifdef _LP64
|
||||
if (nm->is_compiled_by_c2()) {
|
||||
return -14;
|
||||
} else {
|
||||
return -15;
|
||||
}
|
||||
#else
|
||||
return -18;
|
||||
#endif
|
||||
}
|
||||
|
||||
static NativeNMethodCmpBarrier* native_nmethod_barrier(nmethod* nm) {
|
||||
address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
|
||||
address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm);
|
||||
NativeNMethodCmpBarrier* barrier = reinterpret_cast<NativeNMethodCmpBarrier*>(barrier_address);
|
||||
debug_only(barrier->verify());
|
||||
return barrier;
|
||||
|
@ -1518,7 +1518,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
|
||||
|
||||
|
||||
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->nmethod_entry_barrier(masm);
|
||||
bs->nmethod_entry_barrier(masm, NULL /* slow_path */, NULL /* continuation */);
|
||||
|
||||
// Frame is now completed as far as size and linkage.
|
||||
int frame_complete = ((intptr_t)__ pc()) - start;
|
||||
|
@ -1744,7 +1744,8 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
|
||||
__ subptr(rsp, stack_size - 2*wordSize);
|
||||
|
||||
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->nmethod_entry_barrier(masm);
|
||||
// native wrapper is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub
|
||||
bs->nmethod_entry_barrier(masm, NULL /* slow_path */, NULL /* continuation */);
|
||||
|
||||
// Frame is now completed as far as size and linkage.
|
||||
int frame_complete = ((intptr_t)__ pc()) - start;
|
||||
|
@ -29,6 +29,8 @@
|
||||
#include "asm/macroAssembler.inline.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
class C2EntryBarrierStub;
|
||||
|
||||
class C2_MacroAssembler: public MacroAssembler {
|
||||
public:
|
||||
// creation
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include "opto/ad.hpp"
|
||||
#include "opto/block.hpp"
|
||||
#include "opto/c2compiler.hpp"
|
||||
#include "opto/c2_MacroAssembler.hpp"
|
||||
#include "opto/callnode.hpp"
|
||||
#include "opto/cfgnode.hpp"
|
||||
#include "opto/locknode.hpp"
|
||||
@ -284,12 +285,51 @@ int C2SafepointPollStubTable::estimate_stub_size() const {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Nmethod entry barrier stubs
|
||||
C2EntryBarrierStub* C2EntryBarrierStubTable::add_entry_barrier() {
|
||||
assert(_stub == NULL, "There can only be one entry barrier stub");
|
||||
_stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
|
||||
return _stub;
|
||||
}
|
||||
|
||||
void C2EntryBarrierStubTable::emit(CodeBuffer& cb) {
|
||||
if (_stub == NULL) {
|
||||
// No stub - nothing to do
|
||||
return;
|
||||
}
|
||||
|
||||
C2_MacroAssembler masm(&cb);
|
||||
// Make sure there is enough space in the code buffer
|
||||
if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
return;
|
||||
}
|
||||
|
||||
intptr_t before = masm.offset();
|
||||
masm.emit_entry_barrier_stub(_stub);
|
||||
intptr_t after = masm.offset();
|
||||
int actual_size = (int)(after - before);
|
||||
int expected_size = masm.entry_barrier_stub_size();
|
||||
assert(actual_size == expected_size, "Estimated size is wrong, expected %d, was %d", expected_size, actual_size);
|
||||
}
|
||||
|
||||
int C2EntryBarrierStubTable::estimate_stub_size() const {
|
||||
if (BarrierSet::barrier_set()->barrier_set_nmethod() == NULL) {
|
||||
// No nmethod entry barrier?
|
||||
return 0;
|
||||
}
|
||||
|
||||
return C2_MacroAssembler::entry_barrier_stub_size();
|
||||
}
|
||||
|
||||
PhaseOutput::PhaseOutput()
|
||||
: Phase(Phase::Output),
|
||||
_code_buffer("Compile::Fill_buffer"),
|
||||
_first_block_size(0),
|
||||
_handler_table(),
|
||||
_inc_table(),
|
||||
_safepoint_poll_table(),
|
||||
_entry_barrier_table(),
|
||||
_oop_map_set(NULL),
|
||||
_scratch_buffer_blob(NULL),
|
||||
_scratch_locs_memory(NULL),
|
||||
@ -1302,6 +1342,7 @@ CodeBuffer* PhaseOutput::init_buffer() {
|
||||
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
|
||||
stub_req += bs->estimate_stub_size();
|
||||
stub_req += safepoint_poll_table()->estimate_stub_size();
|
||||
stub_req += entry_barrier_table()->estimate_stub_size();
|
||||
|
||||
// nmethod and CodeBuffer count stubs & constants as part of method's code.
|
||||
// class HandlerImpl is platform-specific and defined in the *.ad files.
|
||||
@ -1812,6 +1853,10 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
||||
safepoint_poll_table()->emit(*cb);
|
||||
if (C->failing()) return;
|
||||
|
||||
// Fill in stubs for calling the runtime from nmethod entries.
|
||||
entry_barrier_table()->emit(*cb);
|
||||
if (C->failing()) return;
|
||||
|
||||
#ifndef PRODUCT
|
||||
// Information on the size of the method, without the extraneous code
|
||||
Scheduling::increment_method_size(cb->insts_size());
|
||||
|
@ -40,6 +40,7 @@ class Arena;
|
||||
class Bundle;
|
||||
class Block;
|
||||
class Block_Array;
|
||||
class C2_MacroAssembler;
|
||||
class ciMethod;
|
||||
class Compile;
|
||||
class MachNode;
|
||||
@ -113,6 +114,30 @@ public:
|
||||
void emit(CodeBuffer& cb);
|
||||
};
|
||||
|
||||
// We move non-hot code of the nmethod entry barrier to an out-of-line stub
|
||||
class C2EntryBarrierStub: public ResourceObj {
|
||||
Label _slow_path;
|
||||
Label _continuation;
|
||||
|
||||
public:
|
||||
C2EntryBarrierStub() :
|
||||
_slow_path(),
|
||||
_continuation() {}
|
||||
|
||||
Label& slow_path() { return _slow_path; }
|
||||
Label& continuation() { return _continuation; }
|
||||
};
|
||||
|
||||
class C2EntryBarrierStubTable {
|
||||
C2EntryBarrierStub* _stub;
|
||||
|
||||
public:
|
||||
C2EntryBarrierStubTable() : _stub(NULL) {}
|
||||
C2EntryBarrierStub* add_entry_barrier();
|
||||
int estimate_stub_size() const;
|
||||
void emit(CodeBuffer& cb);
|
||||
};
|
||||
|
||||
class PhaseOutput : public Phase {
|
||||
private:
|
||||
// Instruction bits passed off to the VM
|
||||
@ -122,6 +147,7 @@ private:
|
||||
ExceptionHandlerTable _handler_table; // Table of native-code exception handlers
|
||||
ImplicitExceptionTable _inc_table; // Table of implicit null checks in native code
|
||||
C2SafepointPollStubTable _safepoint_poll_table;// Table for safepoint polls
|
||||
C2EntryBarrierStubTable _entry_barrier_table; // Table for entry barrier stubs
|
||||
OopMapSet* _oop_map_set; // Table of oop maps (one for each safepoint location)
|
||||
BufferBlob* _scratch_buffer_blob; // For temporary code buffers.
|
||||
relocInfo* _scratch_locs_memory; // For temporary code buffers.
|
||||
@ -172,6 +198,9 @@ public:
|
||||
// Safepoint poll table
|
||||
C2SafepointPollStubTable* safepoint_poll_table() { return &_safepoint_poll_table; }
|
||||
|
||||
// Entry barrier table
|
||||
C2EntryBarrierStubTable* entry_barrier_table() { return &_entry_barrier_table; }
|
||||
|
||||
// Code emission iterator
|
||||
Block* block() { return _block; }
|
||||
int index() { return _index; }
|
||||
|
Loading…
Reference in New Issue
Block a user