8189596: AArch64: implementation for Thread-local handshakes

Reviewed-by: adinn
This commit is contained in:
Andrew Haley 2017-11-24 17:19:47 +00:00
parent 5713b7c2bd
commit 295112c91a
11 changed files with 147 additions and 98 deletions

View File

@ -494,42 +494,6 @@ void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
}
}
// Rather than take a segfault when the polling page is protected,
// explicitly check for a safepoint in progress and if there is one,
// fake a call to the handler as if a segfault had been caught.
void LIR_Assembler::poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info) {
__ mov(rscratch1, SafepointSynchronize::address_of_state());
__ ldrb(rscratch1, Address(rscratch1));
Label nope, poll;
__ cbz(rscratch1, nope);
__ block_comment("safepoint");
__ enter();
__ push(0x3, sp); // r0 & r1
__ push(0x3ffffffc, sp); // integer registers except lr & sp & r0 & r1
__ adr(r0, poll);
__ str(r0, Address(rthread, JavaThread::saved_exception_pc_offset()));
__ mov(rscratch1, CAST_FROM_FN_PTR(address, SharedRuntime::get_poll_stub));
__ blrt(rscratch1, 1, 0, 1);
__ maybe_isb();
__ pop(0x3ffffffc, sp); // integer registers except lr & sp & r0 & r1
__ mov(rscratch1, r0);
__ pop(0x3, sp); // r0 & r1
__ leave();
__ br(rscratch1);
address polling_page(os::get_polling_page());
assert(os::is_poll_address(polling_page), "should be");
unsigned long off;
__ adrp(rscratch1, Address(polling_page, rtype), off);
__ bind(poll);
if (info)
add_debug_info_for_branch(info); // This isn't just debug info:
// it's the oop map
else
__ code_section()->relocate(pc(), rtype);
__ ldrw(zr, Address(rscratch1, off));
__ bind(nope);
}
void LIR_Assembler::return_op(LIR_Opr result) {
assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,");
@ -549,9 +513,7 @@ int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
address polling_page(os::get_polling_page());
guarantee(info != NULL, "Shouldn't be NULL");
assert(os::is_poll_address(polling_page), "should be");
unsigned long off;
__ adrp(rscratch1, Address(polling_page, relocInfo::poll_type), off);
assert(off == 0, "must be");
__ get_polling_page(rscratch1, polling_page, relocInfo::poll_type);
add_debug_info_for_branch(info); // This isn't just debug info:
// it's the oop map
__ read_polling_page(rscratch1, relocInfo::poll_type);

View File

@ -51,4 +51,6 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
#define SUPPORT_RESERVED_STACK_AREA
#define THREAD_LOCAL_POLL
#endif // CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP

View File

@ -79,7 +79,7 @@ define_pd_global(bool, CompactStrings, true);
// Clear short arrays bigger than one word in an arch-specific way
define_pd_global(intx, InitArrayShortSize, BytesPerLong);
define_pd_global(bool, ThreadLocalHandshakes, false);
define_pd_global(bool, ThreadLocalHandshakes, true);
#if defined(COMPILER1) || defined(COMPILER2)
define_pd_global(intx, InlineSmallCode, 1000);

View File

@ -30,12 +30,13 @@
#include "logging/log.hpp"
#include "oops/arrayOop.hpp"
#include "oops/markOop.hpp"
#include "oops/methodData.hpp"
#include "oops/method.hpp"
#include "oops/methodData.hpp"
#include "prims/jvmtiExport.hpp"
#include "prims/jvmtiThreadState.hpp"
#include "runtime/basicLock.hpp"
#include "runtime/biasedLocking.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/thread.inline.hpp"
@ -438,13 +439,26 @@ void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
void InterpreterMacroAssembler::dispatch_base(TosState state,
address* table,
bool verifyoop) {
bool verifyoop,
bool generate_poll) {
if (VerifyActivationFrameSize) {
Unimplemented();
}
if (verifyoop) {
verify_oop(r0, state);
}
Label safepoint;
address* const safepoint_table = Interpreter::safept_table(state);
bool needs_thread_local_poll = generate_poll &&
SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
if (needs_thread_local_poll) {
NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
ldr(rscratch2, Address(rthread, Thread::polling_page_offset()));
tbnz(rscratch2, exact_log2(SafepointMechanism::poll_bit()), safepoint);
}
if (table == Interpreter::dispatch_table(state)) {
addw(rscratch2, rscratch1, Interpreter::distance_from_dispatch_table(state));
ldr(rscratch2, Address(rdispatch, rscratch2, Address::uxtw(3)));
@ -453,10 +467,17 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
ldr(rscratch2, Address(rscratch2, rscratch1, Address::uxtw(3)));
}
br(rscratch2);
if (needs_thread_local_poll) {
bind(safepoint);
lea(rscratch2, ExternalAddress((address)safepoint_table));
ldr(rscratch2, Address(rscratch2, rscratch1, Address::uxtw(3)));
br(rscratch2);
}
}
void InterpreterMacroAssembler::dispatch_only(TosState state) {
dispatch_base(state, Interpreter::dispatch_table(state));
void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
}
void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
@ -468,10 +489,10 @@ void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
}
void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
// load next bytecode
ldrb(rscratch1, Address(pre(rbcp, step)));
dispatch_base(state, Interpreter::dispatch_table(state));
dispatch_base(state, Interpreter::dispatch_table(state), generate_poll);
}
void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {

View File

@ -55,7 +55,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
bool check_exceptions);
// base routine for all dispatches
void dispatch_base(TosState state, address* table, bool verifyoop = true);
void dispatch_base(TosState state, address* table,
bool verifyoop = true, bool generate_poll = false);
public:
InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
@ -165,12 +166,12 @@ class InterpreterMacroAssembler: public MacroAssembler {
void dispatch_prolog(TosState state, int step = 0);
void dispatch_epilog(TosState state, int step = 0);
// dispatch via rscratch1
void dispatch_only(TosState state);
void dispatch_only(TosState state, bool generate_poll = false);
// dispatch normal table via rscratch1 (assume rscratch1 is loaded already)
void dispatch_only_normal(TosState state);
void dispatch_only_noverify(TosState state);
// load rscratch1 from [rbcp + step] and dispatch via rscratch1
void dispatch_next(TosState state, int step = 0);
void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
// load rscratch1 from [esi] and dispatch via rscratch1 and table
void dispatch_via (TosState state, address* table);

View File

@ -287,6 +287,40 @@ void MacroAssembler::serialize_memory(Register thread, Register tmp) {
dsb(Assembler::SY);
}
void MacroAssembler::safepoint_poll(Label& slow_path) {
if (SafepointMechanism::uses_thread_local_poll()) {
ldr(rscratch1, Address(rthread, Thread::polling_page_offset()));
tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
} else {
unsigned long offset;
adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
ldrw(rscratch1, Address(rscratch1, offset));
assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
cbnz(rscratch1, slow_path);
}
}
// Just like safepoint_poll, but use an acquiring load for thread-
// local polling.
//
// We need an acquire here to ensure that any subsequent load of the
// global SafepointSynchronize::_state flag is ordered after this load
// of the local Thread::_polling page. We don't want this poll to
// return false (i.e. not safepointing) and a later poll of the global
// SafepointSynchronize::_state spuriously to return true.
//
// This is to avoid a race when we're in a native->Java transition
// racing the code which wakes up from a safepoint.
//
void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
if (SafepointMechanism::uses_thread_local_poll()) {
lea(rscratch1, Address(rthread, Thread::polling_page_offset()));
ldar(rscratch1, rscratch1);
tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
} else {
safepoint_poll(slow_path);
}
}
void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
// we must set sp to zero to clear frame
@ -4336,15 +4370,26 @@ void MacroAssembler::bang_stack_size(Register size, Register tmp) {
}
address MacroAssembler::read_polling_page(Register r, address page, relocInfo::relocType rtype) {
// Move the address of the polling page into dest.
void MacroAssembler::get_polling_page(Register dest, address page, relocInfo::relocType rtype) {
if (SafepointMechanism::uses_thread_local_poll()) {
ldr(dest, Address(rthread, Thread::polling_page_offset()));
} else {
unsigned long off;
adrp(r, Address(page, rtype), off);
InstructionMark im(this);
code_section()->relocate(inst_mark(), rtype);
ldrw(zr, Address(r, off));
return inst_mark();
adrp(dest, Address(page, rtype), off);
assert(off == 0, "polling page must be page aligned");
}
}
// Move the address of the polling page into r, then read the polling
// page.
address MacroAssembler::read_polling_page(Register r, address page, relocInfo::relocType rtype) {
get_polling_page(r, page, rtype);
return read_polling_page(r, rtype);
}
// Read the polling page. The address of the polling page must
// already be in r.
address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
InstructionMark im(this);
code_section()->relocate(inst_mark(), rtype);

View File

@ -97,6 +97,9 @@ class MacroAssembler: public Assembler {
virtual void check_and_handle_popframe(Register java_thread);
virtual void check_and_handle_earlyret(Register java_thread);
void safepoint_poll(Label& slow_path);
void safepoint_poll_acquire(Label& slow_path);
// Biased locking support
// lock_reg and obj_reg must be loaded up with the appropriate values.
// swap_reg is killed.
@ -1199,6 +1202,7 @@ public:
address read_polling_page(Register r, address page, relocInfo::relocType rtype);
address read_polling_page(Register r, relocInfo::relocType rtype);
void get_polling_page(Register dest, address page, relocInfo::relocType rtype);
// CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
void update_byte_crc32(Register crc, Register val, Register table);

View File

@ -245,6 +245,11 @@ bool NativeInstruction::is_safepoint_poll() {
// mov(reg, polling_page);
// ldr(zr, [reg, #offset]);
//
// or
//
// ldr(reg, [rthread, #offset]);
// ldr(zr, [reg, #offset]);
//
// however, we cannot rely on the polling page address load always
// directly preceding the read from the page. C1 does that but C2
// has to do the load and read as two independent instruction

View File

@ -1952,7 +1952,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
// Force this write out before the read below
__ dmb(Assembler::SY);
__ dmb(Assembler::ISH);
} else {
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
__ stlrw(rscratch1, rscratch2);
@ -1970,13 +1970,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// check for safepoint operation in progress and/or pending suspend requests
Label safepoint_in_progress, safepoint_in_progress_done;
{
assert(SafepointSynchronize::_not_synchronized == 0, "fix this code");
unsigned long offset;
__ adrp(rscratch1,
ExternalAddress((address)SafepointSynchronize::address_of_state()),
offset);
__ ldrw(rscratch1, Address(rscratch1, offset));
__ cbnzw(rscratch1, safepoint_in_progress);
__ safepoint_poll_acquire(safepoint_in_progress);
__ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
__ cbnzw(rscratch1, safepoint_in_progress);
__ bind(safepoint_in_progress_done);
@ -2932,8 +2926,11 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
if (!cause_return) {
// overwrite the return address pushed by save_live_registers
__ ldr(c_rarg0, Address(rthread, JavaThread::saved_exception_pc_offset()));
__ str(c_rarg0, Address(rfp, wordSize));
// Additionally, r20 is a callee-saved register so we can look at
// it later to determine if someone changed the return address for
// us!
__ ldr(r20, Address(rthread, JavaThread::saved_exception_pc_offset()));
__ str(r20, Address(rfp, wordSize));
}
// Do the call
@ -2968,11 +2965,40 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
// No exception case
__ bind(noException);
Label no_adjust, bail;
if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
// If our stashed return pc was modified by the runtime we avoid touching it
__ ldr(rscratch1, Address(rfp, wordSize));
__ cmp(r20, rscratch1);
__ br(Assembler::NE, no_adjust);
#ifdef ASSERT
// Verify the correct encoding of the poll we're about to skip.
// See NativeInstruction::is_ldrw_to_zr()
__ ldrw(rscratch1, Address(r20));
__ ubfx(rscratch2, rscratch1, 22, 10);
__ cmpw(rscratch2, 0b1011100101);
__ br(Assembler::NE, bail);
__ ubfx(rscratch2, rscratch1, 0, 5);
__ cmpw(rscratch2, 0b11111);
__ br(Assembler::NE, bail);
#endif
// Adjust return pc forward to step over the safepoint poll instruction
__ add(r20, r20, NativeInstruction::instruction_size);
__ str(r20, Address(rfp, wordSize));
}
__ bind(no_adjust);
// Normal exit, restore registers and exit.
RegisterSaver::restore_live_registers(masm, save_vectors);
__ ret(lr);
#ifdef ASSERT
__ bind(bail);
__ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
#endif
// Make sure all code is generated
masm->flush();

View File

@ -967,12 +967,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
ExternalAddress state(SafepointSynchronize::address_of_state());
unsigned long offset;
__ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
__ ldrw(rscratch1, Address(rscratch1, offset));
assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
__ cbnz(rscratch1, slow_path);
__ safepoint_poll(slow_path);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
@ -986,6 +981,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
__ ldrw(val, Address(esp, 0)); // byte value
__ ldrw(crc, Address(esp, wordSize)); // Initial CRC
unsigned long offset;
__ adrp(tbl, ExternalAddress(StubRoutines::crc_table_addr()), offset);
__ add(tbl, tbl, offset);
@ -1020,12 +1016,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
ExternalAddress state(SafepointSynchronize::address_of_state());
unsigned long offset;
__ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
__ ldrw(rscratch1, Address(rscratch1, offset));
assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
__ cbnz(rscratch1, slow_path);
__ safepoint_poll(slow_path);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
@ -1375,7 +1366,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
if (os::is_MP()) {
if (UseMembar) {
// Force this write out before the read below
__ dsb(Assembler::SY);
__ dmb(Assembler::ISH);
} else {
// Write serialization page so VM thread can do a pseudo remote membar.
// We use the current thread pointer to calculate a thread specific
@ -1387,16 +1378,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// check for safepoint operation in progress and/or pending suspend requests
{
Label Continue;
{
unsigned long offset;
__ adrp(rscratch2, SafepointSynchronize::address_of_state(), offset);
__ ldrw(rscratch2, Address(rscratch2, offset));
}
assert(SafepointSynchronize::_not_synchronized == 0,
"SafepointSynchronize::_not_synchronized");
Label L;
__ cbnz(rscratch2, L);
Label L, Continue;
__ safepoint_poll_acquire(L);
__ ldrw(rscratch2, Address(rthread, JavaThread::suspend_flags_offset()));
__ cbz(rscratch2, Continue);
__ bind(L);

View File

@ -1717,7 +1717,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
__ push_i(r1);
// Adjust the bcp by the 16-bit displacement in r2
__ add(rbcp, rbcp, r2);
__ dispatch_only(vtos);
__ dispatch_only(vtos, /*generate_poll*/true);
return;
}
@ -1833,7 +1833,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
// continue with the bytecode @ target
// rscratch1: target bytecode
// rbcp: target bcp
__ dispatch_only(vtos);
__ dispatch_only(vtos, /*generate_poll*/true);
if (UseLoopCounter) {
if (ProfileInterpreter) {
@ -1973,7 +1973,7 @@ void TemplateTable::ret() {
__ ldr(rbcp, Address(rmethod, Method::const_offset()));
__ lea(rbcp, Address(rbcp, r1));
__ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset()));
__ dispatch_next(vtos);
__ dispatch_next(vtos, 0, /*generate_poll*/true);
}
void TemplateTable::wide_ret() {
@ -1984,7 +1984,7 @@ void TemplateTable::wide_ret() {
__ ldr(rbcp, Address(rmethod, Method::const_offset()));
__ lea(rbcp, Address(rbcp, r1));
__ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset()));
__ dispatch_next(vtos);
__ dispatch_next(vtos, 0, /*generate_poll*/true);
}
@ -2014,7 +2014,7 @@ void TemplateTable::tableswitch() {
__ rev32(r3, r3);
__ load_unsigned_byte(rscratch1, Address(rbcp, r3, Address::sxtw(0)));
__ add(rbcp, rbcp, r3, ext::sxtw);
__ dispatch_only(vtos);
__ dispatch_only(vtos, /*generate_poll*/true);
// handle default
__ bind(default_case);
__ profile_switch_default(r0);
@ -2064,7 +2064,7 @@ void TemplateTable::fast_linearswitch() {
__ rev32(r3, r3);
__ add(rbcp, rbcp, r3, ext::sxtw);
__ ldrb(rscratch1, Address(rbcp, 0));
__ dispatch_only(vtos);
__ dispatch_only(vtos, /*generate_poll*/true);
}
void TemplateTable::fast_binaryswitch() {
@ -2162,7 +2162,7 @@ void TemplateTable::fast_binaryswitch() {
__ rev32(j, j);
__ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0)));
__ lea(rbcp, Address(rbcp, j, Address::sxtw(0)));
__ dispatch_only(vtos);
__ dispatch_only(vtos, /*generate_poll*/true);
// default case -> j = default offset
__ bind(default_case);
@ -2171,7 +2171,7 @@ void TemplateTable::fast_binaryswitch() {
__ rev32(j, j);
__ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0)));
__ lea(rbcp, Address(rbcp, j, Address::sxtw(0)));
__ dispatch_only(vtos);
__ dispatch_only(vtos, /*generate_poll*/true);
}