diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index ecd1c850ae4..3d829e33ee8 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -59,6 +59,9 @@ source_hpp %{ // To keep related declarations/definitions/uses close together, // we switch between source %{ }% and source_hpp %{ }% freely as needed. +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" + // Does destination need to be loaded in a register then passed to a // branch instruction? extern bool maybe_far_call(const CallNode *n); @@ -286,6 +289,17 @@ void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { if (framesize != 0) { st->print ("SUB R_SP, R_SP, " SIZE_FORMAT,framesize); } + + if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) { + st->print("ldr t0, [guard]\n\t"); + st->print("ldr t1, [Rthread, #thread_disarmed_offset]\n\t"); + st->print("cmp t0, t1\n\t"); + st->print("beq skip\n\t"); + st->print("blr #nmethod_entry_barrier_stub\n\t"); + st->print("b skip\n\t"); + st->print("guard: int\n\t"); + st->print("skip:\n\t"); + } } #endif @@ -318,6 +332,11 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { __ sub_slow(SP, SP, framesize); } + if (C->stub_function() == NULL) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->nmethod_entry_barrier(&_masm); + } + // offset from scratch buffer is not valid if (strcmp(cbuf.name(), "Compile::Fill_buffer") == 0) { C->output()->set_frame_complete( __ offset() ); diff --git a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp index d6085b13e42..1e17d18b246 100644 --- a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp @@ -25,6 +25,8 @@ #include "precompiled.hpp" #include "c1/c1_MacroAssembler.hpp" #include "c1/c1_Runtime1.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/collectedHeap.hpp" #include "gc/shared/tlab_globals.hpp" #include "interpreter/interpreter.hpp" @@ -62,6 +64,10 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by // if this method contains a methodHandle call site raw_push(FP, LR); sub_slow(SP, SP, frame_size_in_bytes); + + // Insert nmethod entry barrier into frame. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->nmethod_entry_barrier(this); } void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp index e521cb9a75e..baaab292130 100644 --- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp @@ -23,10 +23,13 @@ */ #include "precompiled.hpp" +#include "gc/shared/barrierSet.hpp" #include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/barrierSetNMethod.hpp" #include "gc/shared/collectedHeap.hpp" #include "memory/universe.hpp" #include "runtime/javaThread.hpp" +#include "runtime/stubRoutines.hpp" #define __ masm-> @@ -195,3 +198,47 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, RegisterOrC // Unborrow the Rthread __ sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset())); } + +void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { + + BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); + + Register tmp0 = Rtemp; + Register tmp1 = R5; // must be callee-save register + + if (bs_nm == NULL) { + return; + } + + // The are no GCs that require memory barrier on arm32 now +#ifdef ASSERT + NMethodPatchingType patching_type = nmethod_patching_type(); + assert(patching_type == NMethodPatchingType::stw_instruction_and_data_patch, "Unsupported patching type"); +#endif + + Label skip, guard; + Address thread_disarmed_addr(Rthread, in_bytes(bs_nm->thread_disarmed_offset())); + + __ block_comment("nmethod_barrier begin"); + __ ldr_label(tmp0, guard); + + // No memory barrier here + __ ldr(tmp1, thread_disarmed_addr); + __ cmp(tmp0, tmp1); + __ b(skip, eq); + + __ mov_address(tmp0, StubRoutines::Arm::method_entry_barrier()); + __ call(tmp0); + __ b(skip); + + __ bind(guard); + + // nmethod guard value. Skipped over in common case. + // + // Put a debug value to make any offsets skew + // clearly visible in coredump + __ emit_int32(0xDEADBEAF); + + __ bind(skip); + __ block_comment("nmethod_barrier end"); +} diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp index 6f100677a8c..37e0c6525f3 100644 --- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp +++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp @@ -29,6 +29,10 @@ #include "memory/allocation.hpp" #include "oops/access.hpp" +enum class NMethodPatchingType { + stw_instruction_and_data_patch, +}; + class BarrierSetAssembler: public CHeapObj { private: void incr_allocated_bytes(MacroAssembler* masm, @@ -56,6 +60,8 @@ public: ); virtual void barrier_stubs_init() {} + virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::stw_instruction_and_data_patch; } + virtual void nmethod_entry_barrier(MacroAssembler* masm); }; #endif // CPU_ARM_GC_SHARED_BARRIERSETASSEMBLER_ARM_HPP diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetNMethod_arm.cpp b/src/hotspot/cpu/arm/gc/shared/barrierSetNMethod_arm.cpp index a0361ddc2f4..f36d44bcfdc 100644 --- a/src/hotspot/cpu/arm/gc/shared/barrierSetNMethod_arm.cpp +++ b/src/hotspot/cpu/arm/gc/shared/barrierSetNMethod_arm.cpp @@ -23,18 +23,124 @@ */ #include "precompiled.hpp" +#include "code/nativeInst.hpp" +#include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/barrierSetNMethod.hpp" +#include "logging/log.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/javaThread.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/registerMap.hpp" +#include "utilities/align.hpp" #include "utilities/debug.hpp" +// The constant below reflects the size of the barrier +// in barrierSetAssembler_arm.cpp +static const int entry_barrier_bytes = 9 * NativeInstruction::size(); + +class NativeNMethodBarrier: public NativeInstruction { + address instruction_address() const { return addr_at(0); } + + int *guard_addr() const { + // Last instruction in a barrier + return reinterpret_cast(instruction_address() + entry_barrier_bytes - wordSize); + } + +public: + int get_value() { + return Atomic::load_acquire(guard_addr()); + } + + void set_value(int value) { + Atomic::release_store(guard_addr(), value); + } + + void verify() const; +}; + +// Check the first instruction of the nmethod entry barrier +// to make sure that the offsets are not skewed. +void NativeNMethodBarrier::verify() const { + NativeInstruction *ni = (NativeInstruction *) instruction_address(); + if (!ni->is_ldr()) { + uint32_t *addr = (uint32_t *) ni; + tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", (intptr_t) addr, (uint32_t) *addr); + fatal("not an ldr instruction."); + } +} + +static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { + address barrier_address = nm->code_begin() + nm->frame_complete_offset() - entry_barrier_bytes; + NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address); + debug_only(barrier->verify()); + return barrier; +} + +/* We're called from an nmethod when we need to deoptimize it. We do + this by throwing away the nmethod's frame and jumping to the + ic_miss stub. This looks like there has been an IC miss at the + entry of the nmethod, so we resolve the call, which will fall back + to the interpreter if the nmethod has been unloaded. */ void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { - ShouldNotReachHere(); + + typedef struct { + intptr_t *sp; intptr_t *fp; address lr; address pc; + } frame_pointers_t; + + frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5); + + JavaThread *thread = JavaThread::current(); + RegisterMap reg_map(thread, + RegisterMap::UpdateMap::skip, + RegisterMap::ProcessFrames::include, + RegisterMap::WalkContinuation::skip); + frame frame = thread->last_frame(); + + assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be"); + assert(frame.cb() == nm, "must be"); + frame = frame.sender(®_map); + + LogTarget(Trace, nmethod, barrier) out; + if (out.is_enabled()) { + ResourceMark mark; + log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", + nm->method()->name_and_sig_as_C_string(), + nm, *(address *) return_address_ptr, nm->is_osr_method(), thread, + thread->name(), frame.sp(), nm->verified_entry_point()); + } + + new_frame->sp = frame.sp(); + new_frame->fp = frame.fp(); + new_frame->lr = frame.pc(); + new_frame->pc = SharedRuntime::get_handle_wrong_method_stub(); } void BarrierSetNMethod::disarm(nmethod* nm) { - ShouldNotReachHere(); + if (!supports_entry_barrier(nm)) { + return; + } + + // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier. + // Symmetric "LDR; DMB ISHLD" is in the nmethod barrier. + NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); + barrier->set_value(disarmed_value()); +} + +void BarrierSetNMethod::arm(nmethod* nm, int arm_value) { + if (!supports_entry_barrier(nm)) { + return; + } + + NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); + barrier->set_value(arm_value); } bool BarrierSetNMethod::is_armed(nmethod* nm) { - ShouldNotReachHere(); - return false; + if (!supports_entry_barrier(nm)) { + return false; + } + + NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); + return barrier->get_value() != disarmed_value(); } diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.hpp b/src/hotspot/cpu/arm/macroAssembler_arm.hpp index 73e2f2c0d17..50b56bfe79a 100644 --- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp +++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp @@ -587,9 +587,23 @@ public: AbstractAssembler::emit_address((address)L.data()); } + void ldr_label(Register rd, Label& L) { + ldr(rd, Address(PC, target(L) - pc() - 8)); + } + void resolve_oop_handle(Register result); void load_mirror(Register mirror, Register method, Register tmp); + void enter() { + raw_push(FP, LR); + mov(FP, SP); + } + + void leave() { + mov(SP, FP); + raw_pop(FP, LR); + } + #define ARM_INSTR_1(common_mnemonic, arm32_mnemonic, arg_type) \ void common_mnemonic(arg_type arg) { \ arm32_mnemonic(arg); \ diff --git a/src/hotspot/cpu/arm/nativeInst_arm_32.hpp b/src/hotspot/cpu/arm/nativeInst_arm_32.hpp index 0c9e157c553..62419a1ddf5 100644 --- a/src/hotspot/cpu/arm/nativeInst_arm_32.hpp +++ b/src/hotspot/cpu/arm/nativeInst_arm_32.hpp @@ -77,9 +77,12 @@ class RawNativeInstruction { address instruction_address() const { return addr_at(0); } address next_raw_instruction_address() const { return addr_at(instruction_size); } + static int size() { return instruction_size; } + static RawNativeInstruction* at(address address) { return (RawNativeInstruction*)address; } + RawNativeInstruction* next_raw() const { return at(next_raw_instruction_address()); } diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp index ab0e79d95c1..56dcc8a1903 100644 --- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp +++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp @@ -28,6 +28,7 @@ #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" #include "compiler/oopMap.hpp" +#include "gc/shared/barrierSetAssembler.hpp" #include "interpreter/interpreter.hpp" #include "logging/log.hpp" #include "memory/resourceArea.hpp" @@ -873,6 +874,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ mov(FP, SP); __ sub_slow(SP, SP, stack_size - 2*wordSize); + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + assert(bs != NULL, "Sanity"); + bs->nmethod_entry_barrier(masm); + int frame_complete = __ pc() - start; OopMapSet* oop_maps = new OopMapSet(); diff --git a/src/hotspot/cpu/arm/stubGenerator_arm.cpp b/src/hotspot/cpu/arm/stubGenerator_arm.cpp index 85227dbb8e8..d5204915619 100644 --- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp @@ -27,6 +27,7 @@ #include "compiler/oopMap.hpp" #include "gc/shared/barrierSet.hpp" #include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/barrierSetNMethod.hpp" #include "interpreter/interpreter.hpp" #include "memory/universe.hpp" #include "nativeInst_arm.hpp" @@ -2905,6 +2906,53 @@ class StubGenerator: public StubCodeGenerator { } + address generate_method_entry_barrier() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); + + Label deoptimize_label; + + address start = __ pc(); + + // No need to save PC on Arm + __ set_last_Java_frame(SP, FP, false, Rtemp); + + __ enter(); + + __ add(Rtemp, SP, wordSize); // Rtemp points to the saved lr + __ sub(SP, SP, 4 * wordSize); // four words for the returned {sp, fp, lr, pc} + + const RegisterSet saved_regs = RegisterSet(R0, R10); + __ push(saved_regs); + __ fpush(FloatRegisterSet(D0, 16)); + + __ mov(c_rarg0, Rtemp); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), c_rarg0); + + __ reset_last_Java_frame(Rtemp); + + __ mov(Rtemp, R0); + + __ fpop(FloatRegisterSet(D0, 16)); + __ pop(saved_regs); + + __ cbnz(Rtemp, deoptimize_label); + + __ leave(); + __ bx(LR); + + __ BIND(deoptimize_label); + + __ ldr(Rtemp, Address(SP, 0)); + __ ldr(FP, Address(SP, wordSize)); + __ ldr(LR, Address(SP, wordSize * 2)); + __ ldr(R5, Address(SP, wordSize * 3)); + __ mov(SP, Rtemp); + __ bx(R5); + + return start; + } + #define COMPILE_CRYPTO #include "stubRoutinesCrypto_arm.cpp" @@ -3097,6 +3145,11 @@ class StubGenerator: public StubCodeGenerator { // arraycopy stubs used by compilers generate_arraycopy_stubs(); + BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); + if (bs_nm != NULL) { + StubRoutines::Arm::_method_entry_barrier = generate_method_entry_barrier(); + } + #ifdef COMPILE_CRYPTO // generate AES intrinsics code if (UseAESIntrinsics) { diff --git a/src/hotspot/cpu/arm/stubRoutines_arm.cpp b/src/hotspot/cpu/arm/stubRoutines_arm.cpp index 6ae6d1fe8d1..98280862e98 100644 --- a/src/hotspot/cpu/arm/stubRoutines_arm.cpp +++ b/src/hotspot/cpu/arm/stubRoutines_arm.cpp @@ -33,3 +33,5 @@ address StubRoutines::Arm::_partial_subtype_check = NULL; address StubRoutines::_atomic_load_long_entry = NULL; address StubRoutines::_atomic_store_long_entry = NULL; + +address StubRoutines::Arm::_method_entry_barrier = NULL; diff --git a/src/hotspot/cpu/arm/stubRoutines_arm.hpp b/src/hotspot/cpu/arm/stubRoutines_arm.hpp index c21c08f3e54..2a46e254ee2 100644 --- a/src/hotspot/cpu/arm/stubRoutines_arm.hpp +++ b/src/hotspot/cpu/arm/stubRoutines_arm.hpp @@ -42,11 +42,13 @@ class Arm { static address _idiv_irem_entry; static address _partial_subtype_check; + static address _method_entry_barrier; public: static address idiv_irem_entry() { return _idiv_irem_entry; } static address partial_subtype_check() { return _partial_subtype_check; } + static address method_entry_barrier() { return _method_entry_barrier; } }; static bool returns_to_call_stub(address return_pc) { diff --git a/src/hotspot/share/gc/shared/barrierSet.cpp b/src/hotspot/share/gc/shared/barrierSet.cpp index 38609360222..9aacee64f08 100644 --- a/src/hotspot/share/gc/shared/barrierSet.cpp +++ b/src/hotspot/share/gc/shared/barrierSet.cpp @@ -59,7 +59,7 @@ static BarrierSetNMethod* select_barrier_set_nmethod(BarrierSetNMethod* barrier_ } else { // The GC needs nmethod entry barriers to deal with continuations // and code cache unloading - return NOT_ARM32(new BarrierSetNMethod()) ARM32_ONLY(nullptr); + return new BarrierSetNMethod(); } }