8193257: PPC64, s390 implementation for Thread-local handshakes

Reviewed-by: goetz, lucy
This commit is contained in:
Martin Doerr 2017-12-14 13:05:20 +01:00
parent 3f59dac0b3
commit d7e6bad25c
22 changed files with 232 additions and 165 deletions

View File

@ -36,6 +36,7 @@
#include "gc/shared/cardTableModRefBS.hpp"
#include "nativeInst_ppc.hpp"
#include "oops/objArrayKlass.hpp"
#include "runtime/safepointMechanism.inline.hpp"
#include "runtime/sharedRuntime.hpp"
#define __ _masm->
@ -1314,11 +1315,10 @@ void LIR_Assembler::return_op(LIR_Opr result) {
__ pop_frame();
}
if (LoadPollAddressFromThread) {
// TODO: PPC port __ ld(polling_page, in_bytes(JavaThread::poll_address_offset()), R16_thread);
Unimplemented();
if (SafepointMechanism::uses_thread_local_poll()) {
__ ld(polling_page, in_bytes(Thread::polling_page_offset()), R16_thread);
} else {
__ load_const_optimized(polling_page, (long)(address) os::get_polling_page(), R0); // TODO: PPC port: get_standard_polling_page()
__ load_const_optimized(polling_page, (long)(address) os::get_polling_page(), R0);
}
// Restore return pc relative to callers' sp.
@ -1341,26 +1341,18 @@ void LIR_Assembler::return_op(LIR_Opr result) {
int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
if (LoadPollAddressFromThread) {
const Register poll_addr = tmp->as_register();
// TODO: PPC port __ ld(poll_addr, in_bytes(JavaThread::poll_address_offset()), R16_thread);
Unimplemented();
__ relocate(relocInfo::poll_type); // XXX
guarantee(info != NULL, "Shouldn't be NULL");
int offset = __ offset();
add_debug_info_for_branch(info);
__ load_from_polling_page(poll_addr);
return offset;
const Register poll_addr = tmp->as_register();
if (SafepointMechanism::uses_thread_local_poll()) {
__ ld(poll_addr, in_bytes(Thread::polling_page_offset()), R16_thread);
} else {
__ load_const_optimized(poll_addr, (intptr_t)os::get_polling_page(), R0);
}
__ load_const_optimized(tmp->as_register(), (intptr_t)os::get_polling_page(), R0); // TODO: PPC port: get_standard_polling_page()
if (info != NULL) {
add_debug_info_for_branch(info);
}
int offset = __ offset();
__ relocate(relocInfo::poll_type);
__ load_from_polling_page(tmp->as_register());
__ load_from_polling_page(poll_addr);
return offset;
}

View File

@ -54,4 +54,6 @@ const bool CCallingConventionRequiresIntsAsLongs = true;
#define SUPPORT_RESERVED_STACK_AREA
#define THREAD_LOCAL_POLL
#endif // CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP

View File

@ -83,7 +83,7 @@ define_pd_global(bool, CompactStrings, true);
// 2x unrolled loop is shorter with more than 9 HeapWords.
define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong);
define_pd_global(bool, ThreadLocalHandshakes, false);
define_pd_global(bool, ThreadLocalHandshakes, true);
// Platform dependent flag handling: flags only defined on this platform.
#define ARCH_FLAGS(develop, \
@ -95,12 +95,6 @@ define_pd_global(bool, ThreadLocalHandshakes, false);
constraint, \
writeable) \
\
/* Load poll address from thread. This is used to implement per-thread */ \
/* safepoints on platforms != IA64. */ \
product(bool, LoadPollAddressFromThread, false, \
"Load polling page address from thread object (required for " \
"per-thread safepoints on platforms != IA64)") \
\
product(uintx, PowerArchitecturePPC64, 0, \
"CPU Version: x for PowerX. Currently recognizes Power5 to " \
"Power8. Default is 0. Newer CPUs will be recognized as Power8.") \

View File

@ -57,10 +57,10 @@ class InterpreterMacroAssembler: public MacroAssembler {
static const Address d_tmp;
// dispatch routines
void dispatch_next(TosState state, int step = 0);
void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
void dispatch_via (TosState state, address* table);
void load_dispatch_table(Register dst, address* table);
void dispatch_Lbyte_code(TosState state, Register bytecode, address* table, bool verify = false);
void dispatch_Lbyte_code(TosState state, Register bytecode, address* table, bool generate_poll = false);
// Called by shared interpreter generator.
void dispatch_prolog(TosState state, int step = 0);

View File

@ -29,6 +29,7 @@
#include "interp_masm_ppc.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "prims/jvmtiThreadState.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#ifdef PRODUCT
@ -53,7 +54,7 @@ void InterpreterMacroAssembler::jump_to_entry(address entry, Register Rscratch)
}
}
void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr) {
void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr, bool generate_poll) {
Register bytecode = R12_scratch2;
if (bcp_incr != 0) {
lbzu(bytecode, bcp_incr, R14_bcp);
@ -61,7 +62,7 @@ void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr) {
lbz(bytecode, 0, R14_bcp);
}
dispatch_Lbyte_code(state, bytecode, Interpreter::dispatch_table(state));
dispatch_Lbyte_code(state, bytecode, Interpreter::dispatch_table(state), generate_poll);
}
void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
@ -203,16 +204,26 @@ void InterpreterMacroAssembler::load_dispatch_table(Register dst, address* table
}
void InterpreterMacroAssembler::dispatch_Lbyte_code(TosState state, Register bytecode,
address* table, bool verify) {
if (verify) {
unimplemented("dispatch_Lbyte_code: verify"); // See Sparc Implementation to implement this
}
address* table, bool generate_poll) {
assert_different_registers(bytecode, R11_scratch1);
// Calc dispatch table address.
load_dispatch_table(R11_scratch1, table);
if (SafepointMechanism::uses_thread_local_poll() && generate_poll) {
address *sfpt_tbl = Interpreter::safept_table(state);
if (table != sfpt_tbl) {
Label dispatch;
ld(R0, in_bytes(Thread::polling_page_offset()), R16_thread);
// Armed page has poll_bit set, if poll bit is cleared just continue.
andi_(R0, R0, SafepointMechanism::poll_bit());
beq(CCR0, dispatch);
load_dispatch_table(R11_scratch1, sfpt_tbl);
align(32, 16);
bind(dispatch);
}
}
sldi(R12_scratch2, bytecode, LogBytesPerWord);
ldx(R11_scratch1, R11_scratch1, R12_scratch2);

View File

@ -37,6 +37,8 @@
#include "runtime/interfaceSupport.hpp"
#include "runtime/objectMonitor.hpp"
#include "runtime/os.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/macros.hpp"
@ -3019,6 +3021,18 @@ void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register t
stwx(R0, tmp1, tmp2);
}
void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) {
if (SafepointMechanism::uses_thread_local_poll()) {
ld(temp_reg, in_bytes(Thread::polling_page_offset()), R16_thread);
// Armed page has poll_bit set.
andi_(temp_reg, temp_reg, SafepointMechanism::poll_bit());
} else {
lwz(temp_reg, (RegisterOrConstant)(intptr_t)SafepointSynchronize::address_of_state());
cmpwi(CCR0, temp_reg, SafepointSynchronize::_not_synchronized);
}
bne(CCR0, slow_path);
}
// GC barrier helper macros

View File

@ -647,6 +647,9 @@ class MacroAssembler: public Assembler {
// Support for serializing memory accesses between threads
void serialize_memory(Register thread, Register tmp1, Register tmp2);
// Check if safepoint requested and if so branch
void safepoint_poll(Label& slow_path, Register temp_reg);
// GC barrier support.
void card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp);
void card_table_write(jbyte* byte_map_base, Register Rtmp, Register Robj);

View File

@ -1577,11 +1577,10 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
}
if (method_needs_polling) {
if (LoadPollAddressFromThread) {
// TODO: PPC port __ ld(polling_page, in_bytes(JavaThread::poll_address_offset()), R16_thread);
Unimplemented();
if (SafepointMechanism::uses_thread_local_poll()) {
__ ld(polling_page, in_bytes(JavaThread::polling_page_offset()), R16_thread);
} else {
__ load_const_optimized(polling_page, (long)(address) os::get_polling_page()); // TODO: PPC port: get_standard_polling_page()
__ load_const_optimized(polling_page, (long)(address) os::get_polling_page());
}
}
@ -14147,7 +14146,6 @@ instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
instruct safePoint_poll(iRegPdst poll) %{
match(SafePoint poll);
predicate(LoadPollAddressFromThread);
// It caused problems to add the effect that r0 is killed, but this
// effect no longer needs to be mentioned, since r0 is not contained
@ -14159,24 +14157,6 @@ instruct safePoint_poll(iRegPdst poll) %{
ins_pipe(pipe_class_default);
%}
// Safepoint without per-thread support. Load address of page to poll
// as constant.
// Rscratch2RegP is R12.
// LoadConPollAddr node is added in pd_post_matching_hook(). It must be
// a seperate node so that the oop map is at the right location.
instruct safePoint_poll_conPollAddr(rscratch2RegP poll) %{
match(SafePoint poll);
predicate(!LoadPollAddressFromThread);
// It caused problems to add the effect that r0 is killed, but this
// effect no longer needs to be mentioned, since r0 is not contained
// in a reg_class.
format %{ "LD R0, #0, R12 \t// Safepoint poll for GC" %}
ins_encode( enc_poll(0x0, poll) );
ins_pipe(pipe_class_default);
%}
// ============================================================================
// Call Instructions

View File

@ -214,6 +214,7 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
// StackFrameStream construction (needed for deoptimization; see
// compiledVFrame::create_stack_value).
// If return_pc_adjustment != 0 adjust the return pc by return_pc_adjustment.
// Updated return pc is returned in R31 (if not return_pc_is_pre_saved).
int i;
int offset;
@ -233,16 +234,17 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
// Save r31 in the last slot of the not yet pushed frame so that we
// can use it as scratch reg.
__ std(R31, -reg_size, R1_SP);
// Save some registers in the last slots of the not yet pushed frame so that we
// can use them as scratch regs.
__ std(R31, - reg_size, R1_SP);
__ std(R30, -2*reg_size, R1_SP);
assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size),
"consistency check");
// save the flags
// Do the save_LR_CR by hand and adjust the return pc if requested.
__ mfcr(R31);
__ std(R31, _abi(cr), R1_SP);
__ mfcr(R30);
__ std(R30, _abi(cr), R1_SP);
switch (return_pc_location) {
case return_pc_is_lr: __ mflr(R31); break;
case return_pc_is_pre_saved: assert(return_pc_adjustment == 0, "unsupported"); break;
@ -257,7 +259,7 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
}
// push a new frame
__ push_frame(frame_size_in_bytes, R31);
__ push_frame(frame_size_in_bytes, R30);
// save all registers (ints and floats)
offset = register_save_offset;
@ -267,7 +269,7 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
switch (reg_type) {
case RegisterSaver::int_reg: {
if (reg_num != 31) { // We spilled R31 right at the beginning.
if (reg_num < 30) { // We spilled R30-31 right at the beginning.
__ std(as_Register(reg_num), offset, R1_SP);
}
break;
@ -278,8 +280,8 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
}
case RegisterSaver::special_reg: {
if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
__ mfctr(R31);
__ std(R31, offset, R1_SP);
__ mfctr(R30);
__ std(R30, offset, R1_SP);
} else {
Unimplemented();
}
@ -2364,23 +2366,14 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
Register sync_state = r_temp_5;
Register suspend_flags = r_temp_6;
__ load_const(sync_state_addr, SafepointSynchronize::address_of_state(), /*temp*/ sync_state);
// TODO: PPC port assert(4 == SafepointSynchronize::sz_state(), "unexpected field size");
__ lwz(sync_state, 0, sync_state_addr);
// No synchronization in progress nor yet synchronized
// (cmp-br-isync on one path, release (same as acquire on PPC64) on the other path).
__ safepoint_poll(sync, sync_state);
// Not suspended.
// TODO: PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
__ lwz(suspend_flags, thread_(suspend_flags));
__ acquire();
Label do_safepoint;
// No synchronization in progress nor yet synchronized.
__ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
// Not suspended.
__ cmpwi(CCR1, suspend_flags, 0);
__ bne(CCR0, sync);
__ beq(CCR1, no_block);
// Block. Save any potential method result value before the operation and
@ -2388,6 +2381,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// lets us share the oopMap we used when we went native rather than create
// a distinct one for this pc.
__ bind(sync);
__ isync();
address entry_point = is_critical_native
? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
@ -2410,7 +2404,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Transition from _thread_in_native_trans to _thread_in_Java.
__ li(R0, _thread_in_Java);
__ release();
__ lwsync(); // Acquire safepoint and suspend state, release thread state.
// TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
__ bind(after_transition);
@ -3093,7 +3087,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc;
}
// Save registers, fpu state, and flags.
// Save registers, fpu state, and flags. Set R31 = return pc.
map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
&frame_size_in_bytes,
/*generate_oop_map=*/ true,
@ -3142,6 +3136,19 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
// No exception case.
__ BIND(noException);
if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
Label no_adjust;
// If our stashed return pc was modified by the runtime we avoid touching it
__ ld(R0, frame_size_in_bytes + _abi(lr), R1_SP);
__ cmpd(CCR0, R0, R31);
__ bne(CCR0, no_adjust);
// Adjust return pc forward to step over the safepoint poll instruction
__ addi(R31, R31, 4);
__ std(R31, frame_size_in_bytes + _abi(lr), R1_SP);
__ bind(no_adjust);
}
// Normal exit, restore registers and exit.
RegisterSaver::restore_live_registers_and_pop_frame(masm,

View File

@ -1535,23 +1535,17 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// Acquire isn't strictly necessary here because of the fence, but
// sync_state is declared to be volatile, so we do it anyway
// (cmp-br-isync on one path, release (same as acquire on PPC64) on the other path).
int sync_state_offs = __ load_const_optimized(sync_state_addr, SafepointSynchronize::address_of_state(), /*temp*/R0, true);
// TODO PPC port assert(4 == SafepointSynchronize::sz_state(), "unexpected field size");
__ lwz(sync_state, sync_state_offs, sync_state_addr);
Label do_safepoint, sync_check_done;
// No synchronization in progress nor yet synchronized.
__ safepoint_poll(do_safepoint, sync_state);
// Not suspended.
// TODO PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
__ lwz(suspend_flags, thread_(suspend_flags));
Label sync_check_done;
Label do_safepoint;
// No synchronization in progress nor yet synchronized.
__ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
// Not suspended.
__ cmpwi(CCR1, suspend_flags, 0);
__ bne(CCR0, do_safepoint);
__ beq(CCR1, sync_check_done);
__ bind(do_safepoint);
__ isync();
// Block. We do the call directly and leave the current
@ -1592,7 +1586,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// we don't want the current thread to continue until all our prior memory
// accesses (including the new thread state) are visible to other threads.
__ li(R0/*thread_state*/, _thread_in_Java);
__ release();
__ lwsync(); // Acquire safepoint and suspend state, release thread state.
__ stw(R0/*thread_state*/, thread_(thread_state));
if (CheckJNICalls) {
@ -1858,10 +1852,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
// Safepoint check
const Register sync_state = R11_scratch1;
int sync_state_offs = __ load_const_optimized(sync_state, SafepointSynchronize::address_of_state(), /*temp*/R0, true);
__ lwz(sync_state, sync_state_offs, sync_state);
__ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
__ bne(CCR0, slow_path);
__ safepoint_poll(slow_path, sync_state);
// We don't generate local frame and don't align stack because
// we not even call stub code (we generate the code inline)
@ -1918,10 +1909,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
// Safepoint check
const Register sync_state = R11_scratch1;
int sync_state_offs = __ load_const_optimized(sync_state, SafepointSynchronize::address_of_state(), /*temp*/R0, true);
__ lwz(sync_state, sync_state_offs, sync_state);
__ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
__ bne(CCR0, slow_path);
__ safepoint_poll(slow_path, sync_state);
// We don't generate local frame and don't align stack because
// we not even call stub code (we generate the code inline)

View File

@ -1630,7 +1630,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
// Push returnAddress for "ret" on stack.
__ push_ptr(R17_tos);
// And away we go!
__ dispatch_next(vtos);
__ dispatch_next(vtos, 0 ,true);
return;
}
@ -1643,7 +1643,6 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
const bool increment_invocation_counter_for_backward_branches = UseCompiler && UseLoopCounter;
if (increment_invocation_counter_for_backward_branches) {
Label Lforward;
__ dispatch_prolog(vtos);
// Check branch direction.
__ cmpdi(CCR0, Rdisp, 0);
@ -1744,11 +1743,8 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
}
__ bind(Lforward);
__ dispatch_epilog(vtos);
} else {
__ dispatch_next(vtos);
}
__ dispatch_next(vtos, 0, true);
}
// Helper function for if_cmp* methods below.
@ -1829,7 +1825,7 @@ void TemplateTable::ret() {
__ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method);
__ add(R11_scratch1, R17_tos, R11_scratch1);
__ addi(R14_bcp, R11_scratch1, in_bytes(ConstMethod::codes_offset()));
__ dispatch_next(vtos);
__ dispatch_next(vtos, 0, true);
}
void TemplateTable::wide_ret() {
@ -1846,7 +1842,7 @@ void TemplateTable::wide_ret() {
__ ld(Rscratch1, in_bytes(Method::const_offset()), R19_method);
__ addi(Rscratch2, R17_tos, in_bytes(ConstMethod::codes_offset()));
__ add(R14_bcp, Rscratch1, Rscratch2);
__ dispatch_next(vtos);
__ dispatch_next(vtos, 0, true);
}
void TemplateTable::tableswitch() {
@ -1896,7 +1892,7 @@ void TemplateTable::tableswitch() {
__ bind(Ldispatch);
__ add(R14_bcp, Roffset, R14_bcp);
__ dispatch_next(vtos);
__ dispatch_next(vtos, 0, true);
}
void TemplateTable::lookupswitch() {
@ -1960,7 +1956,7 @@ void TemplateTable::fast_linearswitch() {
__ bind(Lcontinue_execution);
__ add(R14_bcp, Roffset, R14_bcp);
__ dispatch_next(vtos);
__ dispatch_next(vtos, 0, true);
}
// Table switch using binary search (value/offset pairs are ordered).
@ -2093,7 +2089,7 @@ void TemplateTable::fast_binaryswitch() {
__ extsw(Rj, Rj);
__ add(R14_bcp, Rj, R14_bcp);
__ dispatch_next(vtos);
__ dispatch_next(vtos, 0 , true);
}
void TemplateTable::_return(TosState state) {
@ -2124,6 +2120,17 @@ void TemplateTable::_return(TosState state) {
__ bind(Lskip_register_finalizer);
}
if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
Label no_safepoint;
__ ld(R11_scratch1, in_bytes(Thread::polling_page_offset()), R16_thread);
__ andi_(R11_scratch1, R11_scratch1, SafepointMechanism::poll_bit());
__ beq(CCR0, no_safepoint);
__ push(state);
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint));
__ pop(state);
__ bind(no_safepoint);
}
// Move the result value into the correct register and remove memory stack frame.
__ remove_activation(state, /* throw_monitor_exception */ true);
// Restoration of lr done by remove_activation.

View File

@ -36,6 +36,7 @@
#include "gc/shared/cardTableModRefBS.hpp"
#include "nativeInst_s390.hpp"
#include "oops/objArrayKlass.hpp"
#include "runtime/safepointMechanism.inline.hpp"
#include "runtime/sharedRuntime.hpp"
#include "vmreg_s390.inline.hpp"
@ -1135,8 +1136,12 @@ void LIR_Assembler::return_op(LIR_Opr result) {
(result->is_single_fpu() && result->as_float_reg() == Z_F0) ||
(result->is_double_fpu() && result->as_double_reg() == Z_F0), "convention");
AddressLiteral pp(os::get_polling_page());
__ load_const_optimized(Z_R1_scratch, pp);
if (SafepointMechanism::uses_thread_local_poll()) {
__ z_lg(Z_R1_scratch, Address(Z_thread, Thread::polling_page_offset()));
} else {
AddressLiteral pp(os::get_polling_page());
__ load_const_optimized(Z_R1_scratch, pp);
}
// Pop the frame before the safepoint code.
__ pop_frame_restore_retPC(initial_frame_size_in_bytes());
@ -1154,13 +1159,18 @@ void LIR_Assembler::return_op(LIR_Opr result) {
}
int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
AddressLiteral pp(os::get_polling_page());
__ load_const_optimized(tmp->as_register_lo(), pp);
const Register poll_addr = tmp->as_register_lo();
if (SafepointMechanism::uses_thread_local_poll()) {
__ z_lg(poll_addr, Address(Z_thread, Thread::polling_page_offset()));
} else {
AddressLiteral pp(os::get_polling_page());
__ load_const_optimized(poll_addr, pp);
}
guarantee(info != NULL, "Shouldn't be NULL");
add_debug_info_for_branch(info);
int offset = __ offset();
__ relocate(relocInfo::poll_type);
__ load_from_polling_page(tmp->as_register_lo());
__ load_from_polling_page(poll_addr);
return offset;
}

View File

@ -54,4 +54,6 @@ const bool CCallingConventionRequiresIntsAsLongs = true;
#define SUPPORT_RESERVED_STACK_AREA
#define THREAD_LOCAL_POLL
#endif // CPU_S390_VM_GLOBALDEFINITIONS_S390_HPP

View File

@ -85,7 +85,7 @@ define_pd_global(bool, CompactStrings, true);
// 8146801 (Short Array Allocation): No performance work done here yet.
define_pd_global(intx, InitArrayShortSize, 1*BytesPerLong);
define_pd_global(bool, ThreadLocalHandshakes, false);
define_pd_global(bool, ThreadLocalHandshakes, true);
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint, writeable) \
\

View File

@ -36,6 +36,7 @@
#include "prims/jvmtiThreadState.hpp"
#include "runtime/basicLock.hpp"
#include "runtime/biasedLocking.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/thread.inline.hpp"
@ -74,16 +75,16 @@ void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
dispatch_next(state, step);
}
void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr) {
void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr, bool generate_poll) {
z_llgc(Z_bytecode, bcp_incr, Z_R0, Z_bcp); // Load next bytecode.
add2reg(Z_bcp, bcp_incr); // Advance bcp. Add2reg produces optimal code.
dispatch_base(state, Interpreter::dispatch_table(state));
dispatch_base(state, Interpreter::dispatch_table(state), generate_poll);
}
// Common code to dispatch and dispatch_only.
// Dispatch value in Lbyte_code and increment Lbcp.
void InterpreterMacroAssembler::dispatch_base(TosState state, address* table) {
void InterpreterMacroAssembler::dispatch_base(TosState state, address* table, bool generate_poll) {
verify_FPU(1, state);
#ifdef ASSERT
@ -109,7 +110,20 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, address* table) {
verify_oop(Z_tos, state);
// Dispatch table to use.
load_absolute_address(Z_tmp_1, (address) table); // Z_tmp_1 = table;
load_absolute_address(Z_tmp_1, (address)table); // Z_tmp_1 = table;
if (SafepointMechanism::uses_thread_local_poll() && generate_poll) {
address *sfpt_tbl = Interpreter::safept_table(state);
if (table != sfpt_tbl) {
Label dispatch;
const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */);
// Armed page has poll_bit set, if poll bit is cleared just continue.
z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
z_braz(dispatch);
load_absolute_address(Z_tmp_1, (address)sfpt_tbl); // Z_tmp_1 = table;
bind(dispatch);
}
}
// 0 <= Z_bytecode < 256 => Use a 32 bit shift, because it is shorter than sllg.
// Z_bytecode must have been loaded zero-extended for this approach to be correct.
@ -119,8 +133,8 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, address* table) {
z_br(Z_tmp_1);
}
void InterpreterMacroAssembler::dispatch_only(TosState state) {
dispatch_base(state, Interpreter::dispatch_table(state));
void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
dispatch_base(state, Interpreter::dispatch_table(state), generate_poll);
}
void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {

View File

@ -49,7 +49,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
bool check_exceptions);
// Base routine for all dispatches.
void dispatch_base(TosState state, address* table);
void dispatch_base(TosState state, address* table, bool generate_poll = false);
public:
InterpreterMacroAssembler(CodeBuffer* c)
@ -78,11 +78,11 @@ class InterpreterMacroAssembler: public MacroAssembler {
// dispatch routines
void dispatch_prolog(TosState state, int step = 0);
void dispatch_epilog(TosState state, int step = 0);
void dispatch_only(TosState state);
void dispatch_only(TosState state, bool generate_poll = false);
// Dispatch normal table via Z_bytecode (assume Z_bytecode is loaded already).
void dispatch_only_normal(TosState state);
void dispatch_normal(TosState state);
void dispatch_next(TosState state, int step = 0);
void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
void dispatch_next_noverify_oop(TosState state, int step = 0);
void dispatch_via(TosState state, address* table);

View File

@ -43,6 +43,8 @@
#include "runtime/interfaceSupport.hpp"
#include "runtime/objectMonitor.hpp"
#include "runtime/os.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/events.hpp"
@ -2019,6 +2021,15 @@ address MacroAssembler::get_PC(Register result, int64_t offset) {
return here + offset;
}
void MacroAssembler::instr_size(Register size, Register pc) {
// Extract 2 most significant bits of current instruction.
z_llgc(size, Address(pc));
z_srl(size, 6);
// Compute (x+3)&6 which translates 0->2, 1->4, 2->4, 3->6.
z_ahi(size, 3);
z_nill(size, 6);
}
// Resize_frame with SP(new) = SP(old) - [offset].
void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp)
{
@ -2705,6 +2716,19 @@ void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register t
z_st(Z_R0, 0, tmp2, tmp1);
}
void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) {
if (SafepointMechanism::uses_thread_local_poll()) {
const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */);
// Armed page has poll_bit set.
z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
z_brnaz(slow_path);
} else {
load_const_optimized(temp_reg, SafepointSynchronize::address_of_state());
z_cli(/*SafepointSynchronize::sz_state()*/4-1, temp_reg, SafepointSynchronize::_not_synchronized);
z_brne(slow_path);
}
}
// Don't rely on register locking, always use Z_R1 as scratch register instead.
void MacroAssembler::bang_stack_with_offset(int offset) {
// Stack grows down, caller passes positive offset.
@ -6457,27 +6481,6 @@ void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) {
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
}
void MacroAssembler::generate_safepoint_check(Label& slow_path, Register scratch, bool may_relocate) {
if (scratch == noreg) scratch = Z_R1;
address Astate = SafepointSynchronize::address_of_state();
BLOCK_COMMENT("safepoint check:");
if (may_relocate) {
ptrdiff_t total_distance = Astate - this->pc();
if (RelAddr::is_in_range_of_RelAddr32(total_distance)) {
RelocationHolder rspec = external_word_Relocation::spec(Astate);
(this)->relocate(rspec, relocInfo::pcrel_addr_format);
load_absolute_address(scratch, Astate);
} else {
load_const_optimized(scratch, Astate);
}
} else {
load_absolute_address(scratch, Astate);
}
z_cli(/*SafepointSynchronize::sz_state()*/4-1, scratch, SafepointSynchronize::_not_synchronized);
z_brne(slow_path);
}
void MacroAssembler::generate_type_profiling(const Register Rdata,
const Register Rreceiver_klass,

View File

@ -260,8 +260,6 @@ class MacroAssembler: public Assembler {
//
// Constants, loading constants, TOC support
//
// Safepoint check factored out.
void generate_safepoint_check(Label& slow_path, Register scratch = noreg, bool may_relocate = true);
// Load generic address: d <- base(a) + index(a) + disp(a).
inline void load_address(Register d, const Address &a);
@ -443,6 +441,9 @@ class MacroAssembler: public Assembler {
// Get current PC + offset. Offset given in bytes, must be even!
address get_PC(Register result, int64_t offset);
// Get size of instruction at pc (which must point to valid code).
void instr_size(Register size, Register pc);
// Accessing, and in particular modifying, a stack location is only safe if
// the stack pointer (Z_SP) is set such that the accessed stack location is
// in the reserved range.
@ -641,6 +642,9 @@ class MacroAssembler: public Assembler {
// Support for serializing memory accesses between threads.
void serialize_memory(Register thread, Register tmp1, Register tmp2);
// Check if safepoint requested and if so branch
void safepoint_poll(Label& slow_path, Register temp_reg);
// Stack overflow checking
void bang_stack_with_offset(int offset);

View File

@ -919,8 +919,12 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
// Touch the polling page.
if (need_polling) {
AddressLiteral pp(os::get_polling_page());
__ load_const_optimized(Z_R1_scratch, pp);
if (SafepointMechanism::uses_thread_local_poll()) {
__ z_lg(Z_R1_scratch, Address(Z_thread, Thread::polling_page_offset()));
} else {
AddressLiteral pp(os::get_polling_page());
__ load_const_optimized(Z_R1_scratch, pp);
}
// We need to mark the code position where the load from the safepoint
// polling page was emitted as relocInfo::poll_return_type here.
__ relocate(relocInfo::poll_return_type);

View File

@ -2165,7 +2165,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ serialize_memory(Z_thread, Z_R1, Z_R2);
}
}
__ generate_safepoint_check(sync, Z_R1, true);
__ safepoint_poll(sync, Z_R1);
__ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
__ z_bre(no_block);
@ -3190,12 +3190,18 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
bool cause_return = (poll_type == POLL_AT_RETURN);
// Make room for return address (or push it again)
if (!cause_return)
if (!cause_return) {
__ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
}
// Save registers, fpu state, and flags
map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
// Keep a copy of the return pc to detect if it gets modified.
__ z_lgr(Z_R6, Z_R14);
}
// The following is basically a call_VM. However, we need the precise
// address of the call in order to generate an oopmap. Hence, we do all the
// work outselves.
@ -3231,6 +3237,21 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
// No exception case
__ bind(noException);
if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
Label no_adjust;
// If our stashed return pc was modified by the runtime we avoid touching it
const int offset_of_return_pc = _z_abi16(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers);
__ z_cg(Z_R6, offset_of_return_pc, Z_SP);
__ z_brne(no_adjust);
// Adjust return pc forward to step over the safepoint poll instruction
__ instr_size(Z_R1_scratch, Z_R6);
__ z_agr(Z_R6, Z_R1_scratch);
__ z_stg(Z_R6, offset_of_return_pc, Z_SP);
__ bind(no_adjust);
}
// Normal exit, restore registers and exit.
RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);

View File

@ -1633,7 +1633,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// Check for safepoint operation in progress and/or pending suspend requests.
{
Label Continue, do_safepoint;
__ generate_safepoint_check(do_safepoint, Z_R1, true);
__ safepoint_poll(do_safepoint, Z_R1);
// Check for suspend.
__ load_and_test_int(Z_R0/*suspend_flags*/, thread_(suspend_flags));
__ z_bre(Continue); // 0 -> no flag set -> not suspended
@ -1937,7 +1937,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
__ generate_safepoint_check(slow_path, Z_R1, false);
__ safepoint_poll(slow_path, Z_R1);
BLOCK_COMMENT("CRC32_update {");
@ -1990,7 +1990,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
__ generate_safepoint_check(slow_path, Z_R1, false);
__ safepoint_poll(slow_path, Z_R1);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.

View File

@ -1853,7 +1853,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
// Push return address for "ret" on stack.
__ push_ptr(Z_tos);
// And away we go!
__ dispatch_next(vtos);
__ dispatch_next(vtos, 0 , true);
return;
}
@ -1961,7 +1961,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
// Z_tos: Return bci for jsr's, unused otherwise.
// Z_bytecode: target bytecode
// Z_bcp: target bcp
__ dispatch_only(vtos);
__ dispatch_only(vtos, true);
// Out-of-line code runtime calls.
if (UseLoopCounter) {
@ -2072,7 +2072,7 @@ void TemplateTable::ret() {
__ get_method(Z_tos);
__ mem2reg_opt(Z_R1_scratch, Address(Z_tos, Method::const_offset()));
__ load_address(Z_bcp, Address(Z_R1_scratch, Z_tmp_1, ConstMethod::codes_offset()));
__ dispatch_next(vtos);
__ dispatch_next(vtos, 0 , true);
}
void TemplateTable::wide_ret() {
@ -2085,7 +2085,7 @@ void TemplateTable::wide_ret() {
__ get_method(Z_tos);
__ mem2reg_opt(Z_R1_scratch, Address(Z_tos, Method::const_offset()));
__ load_address(Z_bcp, Address(Z_R1_scratch, Z_tmp_1, ConstMethod::codes_offset()));
__ dispatch_next(vtos);
__ dispatch_next(vtos, 0, true);
}
void TemplateTable::tableswitch () {
@ -2129,7 +2129,7 @@ void TemplateTable::tableswitch () {
// Load next bytecode.
__ z_llgc(Z_bytecode, Address(Z_bcp, index));
__ z_agr(Z_bcp, index); // Advance bcp.
__ dispatch_only(vtos);
__ dispatch_only(vtos, true);
// Handle default.
__ bind(default_case);
@ -2193,7 +2193,7 @@ void TemplateTable::fast_linearswitch () {
// Load next bytecode.
__ z_llgc(Z_bytecode, Address(Z_bcp, offset, 0));
__ z_agr(Z_bcp, offset); // Advance bcp.
__ dispatch_only(vtos);
__ dispatch_only(vtos, true);
}
@ -2302,7 +2302,7 @@ void TemplateTable::fast_binaryswitch() {
// Load next bytecode.
__ z_llgc(Z_bytecode, Address(Z_bcp, j));
__ z_agr(Z_bcp, j); // Advance bcp.
__ dispatch_only(vtos);
__ dispatch_only(vtos, true);
// default case -> j = default offset
__ bind(default_case);
@ -2312,7 +2312,7 @@ void TemplateTable::fast_binaryswitch() {
// Load next bytecode.
__ z_llgc(Z_bytecode, Address(Z_bcp, j));
__ z_agr(Z_bcp, j); // Advance bcp.
__ dispatch_only(vtos);
__ dispatch_only(vtos, true);
}
void TemplateTable::_return(TosState state) {
@ -2333,6 +2333,17 @@ void TemplateTable::_return(TosState state) {
__ bind(skip_register_finalizer);
}
if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
Label no_safepoint;
const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */);
__ z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
__ z_braz(no_safepoint);
__ push(state);
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint));
__ pop(state);
__ bind(no_safepoint);
}
if (state == itos) {
// Narrow result if state is itos but result type is smaller.
// Need to narrow in the return bytecode rather than in generate_return_entry