8307907: [ppc] Remove RTM locking implementation
Reviewed-by: mbaesken, rrich, stuefe
This commit is contained in:
parent
4c0e164238
commit
de8aca27ba
src/hotspot
cpu/ppc
assembler_ppc.hppassembler_ppc.inline.hppglobalDefinitions_ppc.hppglobals_ppc.hppmacroAssembler_ppc.cppmacroAssembler_ppc.hppppc.adsharedRuntime_ppc.cppvm_version_ppc.cppvm_version_ppc.hpp
share/runtime
test/hotspot/jtreg
@ -337,15 +337,6 @@ class Assembler : public AbstractAssembler {
|
||||
MFCTR_OPCODE = (MFSPR_OPCODE | 9 << SPR_0_4_SHIFT),
|
||||
|
||||
// Attention: Higher and lower half are inserted in reversed order.
|
||||
MTTFHAR_OPCODE = (MTSPR_OPCODE | 4 << SPR_5_9_SHIFT | 0 << SPR_0_4_SHIFT),
|
||||
MFTFHAR_OPCODE = (MFSPR_OPCODE | 4 << SPR_5_9_SHIFT | 0 << SPR_0_4_SHIFT),
|
||||
MTTFIAR_OPCODE = (MTSPR_OPCODE | 4 << SPR_5_9_SHIFT | 1 << SPR_0_4_SHIFT),
|
||||
MFTFIAR_OPCODE = (MFSPR_OPCODE | 4 << SPR_5_9_SHIFT | 1 << SPR_0_4_SHIFT),
|
||||
MTTEXASR_OPCODE = (MTSPR_OPCODE | 4 << SPR_5_9_SHIFT | 2 << SPR_0_4_SHIFT),
|
||||
MFTEXASR_OPCODE = (MFSPR_OPCODE | 4 << SPR_5_9_SHIFT | 2 << SPR_0_4_SHIFT),
|
||||
MTTEXASRU_OPCODE = (MTSPR_OPCODE | 4 << SPR_5_9_SHIFT | 3 << SPR_0_4_SHIFT),
|
||||
MFTEXASRU_OPCODE = (MFSPR_OPCODE | 4 << SPR_5_9_SHIFT | 3 << SPR_0_4_SHIFT),
|
||||
|
||||
MTVRSAVE_OPCODE = (MTSPR_OPCODE | 8 << SPR_5_9_SHIFT | 0 << SPR_0_4_SHIFT),
|
||||
MFVRSAVE_OPCODE = (MFSPR_OPCODE | 8 << SPR_5_9_SHIFT | 0 << SPR_0_4_SHIFT),
|
||||
|
||||
@ -766,17 +757,6 @@ class Assembler : public AbstractAssembler {
|
||||
// Vector Permute and Xor (introduced with Power 8)
|
||||
VPERMXOR_OPCODE = (4u << OPCODE_SHIFT | 45u),
|
||||
|
||||
// Transactional Memory instructions (introduced with Power 8)
|
||||
TBEGIN_OPCODE = (31u << OPCODE_SHIFT | 654u << 1),
|
||||
TEND_OPCODE = (31u << OPCODE_SHIFT | 686u << 1),
|
||||
TABORT_OPCODE = (31u << OPCODE_SHIFT | 910u << 1),
|
||||
TABORTWC_OPCODE = (31u << OPCODE_SHIFT | 782u << 1),
|
||||
TABORTWCI_OPCODE = (31u << OPCODE_SHIFT | 846u << 1),
|
||||
TABORTDC_OPCODE = (31u << OPCODE_SHIFT | 814u << 1),
|
||||
TABORTDCI_OPCODE = (31u << OPCODE_SHIFT | 878u << 1),
|
||||
TSR_OPCODE = (31u << OPCODE_SHIFT | 750u << 1),
|
||||
TCHECK_OPCODE = (31u << OPCODE_SHIFT | 718u << 1),
|
||||
|
||||
// Icache and dcache related instructions
|
||||
DCBA_OPCODE = (31u << OPCODE_SHIFT | 758u << 1),
|
||||
DCBZ_OPCODE = (31u << OPCODE_SHIFT | 1014u << 1),
|
||||
@ -1814,33 +1794,6 @@ class Assembler : public AbstractAssembler {
|
||||
// Data Stream Control Register
|
||||
inline void mtdscr(Register s1);
|
||||
inline void mfdscr(Register d );
|
||||
// Transactional Memory Registers
|
||||
inline void mftfhar(Register d);
|
||||
inline void mftfiar(Register d);
|
||||
inline void mftexasr(Register d);
|
||||
inline void mftexasru(Register d);
|
||||
|
||||
// TEXASR bit description
|
||||
enum transaction_failure_reason {
|
||||
// Upper half (TEXASRU):
|
||||
tm_failure_code = 0, // The Failure Code is copied from tabort or treclaim operand.
|
||||
tm_failure_persistent = 7, // The failure is likely to recur on each execution.
|
||||
tm_disallowed = 8, // The instruction is not permitted.
|
||||
tm_nesting_of = 9, // The maximum transaction level was exceeded.
|
||||
tm_footprint_of = 10, // The tracking limit for transactional storage accesses was exceeded.
|
||||
tm_self_induced_cf = 11, // A self-induced conflict occurred in Suspended state.
|
||||
tm_non_trans_cf = 12, // A conflict occurred with a non-transactional access by another processor.
|
||||
tm_trans_cf = 13, // A conflict occurred with another transaction.
|
||||
tm_translation_cf = 14, // A conflict occurred with a TLB invalidation.
|
||||
tm_inst_fetch_cf = 16, // An instruction fetch was performed from a block that was previously written transactionally.
|
||||
tm_tabort = 31, // Termination was caused by the execution of an abort instruction.
|
||||
// Lower half:
|
||||
tm_suspended = 32, // Failure was recorded in Suspended state.
|
||||
tm_failure_summary = 36, // Failure has been detected and recorded.
|
||||
tm_tfiar_exact = 37, // Value in the TFIAR is exact.
|
||||
tm_rot = 38, // Rollback-only transaction.
|
||||
tm_transaction_level = 52, // Transaction level (nesting depth + 1).
|
||||
};
|
||||
|
||||
// PPC 1, section 2.4.1 Branch Instructions
|
||||
inline void b( address a, relocInfo::relocType rt = relocInfo::none);
|
||||
@ -2452,25 +2405,6 @@ class Assembler : public AbstractAssembler {
|
||||
// Vector Permute and Xor (introduced with Power 8)
|
||||
inline void vpermxor( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
|
||||
|
||||
// Transactional Memory instructions (introduced with Power 8)
|
||||
inline void tbegin_(); // R=0
|
||||
inline void tbeginrot_(); // R=1 Rollback-Only Transaction
|
||||
inline void tend_(); // A=0
|
||||
inline void tendall_(); // A=1
|
||||
inline void tabort_();
|
||||
inline void tabort_(Register a);
|
||||
inline void tabortwc_(int t, Register a, Register b);
|
||||
inline void tabortwci_(int t, Register a, int si);
|
||||
inline void tabortdc_(int t, Register a, Register b);
|
||||
inline void tabortdci_(int t, Register a, int si);
|
||||
inline void tsuspend_(); // tsr with L=0
|
||||
inline void tresume_(); // tsr with L=1
|
||||
inline void tcheck(int f);
|
||||
|
||||
static bool is_tbegin(int x) {
|
||||
return TBEGIN_OPCODE == (x & (0x3f << OPCODE_SHIFT | 0x3ff << 1));
|
||||
}
|
||||
|
||||
// The following encoders use r0 as second operand. These instructions
|
||||
// read r0 as '0'.
|
||||
inline void lwzx( Register d, Register s2);
|
||||
|
@ -433,13 +433,7 @@ inline void Assembler::mftb(Register d ) { emit_int32(MFTB_OPCODE | rt
|
||||
// Data Stream Control Register
|
||||
inline void Assembler::mtdscr(Register s1) { emit_int32(MTDSCR_OPCODE | rs(s1)); }
|
||||
inline void Assembler::mfdscr(Register d ) { emit_int32(MFDSCR_OPCODE | rt(d)); }
|
||||
// Transactional Memory Registers
|
||||
inline void Assembler::mftfhar(Register d ) { emit_int32(MFTFHAR_OPCODE | rt(d)); }
|
||||
inline void Assembler::mftfiar(Register d ) { emit_int32(MFTFIAR_OPCODE | rt(d)); }
|
||||
inline void Assembler::mftexasr(Register d ) { emit_int32(MFTEXASR_OPCODE | rt(d)); }
|
||||
inline void Assembler::mftexasru(Register d ) { emit_int32(MFTEXASRU_OPCODE | rt(d)); }
|
||||
|
||||
// SAP JVM 2006-02-13 PPC branch instruction.
|
||||
// PPC 1, section 2.4.1 Branch Instructions
|
||||
inline void Assembler::b( address a, relocInfo::relocType rt) { emit_data(BXX_OPCODE| li(disp( intptr_t(a), intptr_t(pc()))) |aa(0)|lk(0), rt); }
|
||||
inline void Assembler::b( Label& L) { b( target(L)); }
|
||||
@ -1048,21 +1042,6 @@ inline void Assembler::vpmsumw( VectorRegister d, VectorRegister a, VectorRegis
|
||||
// Vector Permute and Xor (introduced with Power 8)
|
||||
inline void Assembler::vpermxor( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VPERMXOR_OPCODE | vrt(d) | vra(a) | vrb(b) | vrc(c)); }
|
||||
|
||||
// Transactional Memory instructions (introduced with Power 8)
|
||||
inline void Assembler::tbegin_() { emit_int32( TBEGIN_OPCODE | rc(1)); }
|
||||
inline void Assembler::tbeginrot_() { emit_int32( TBEGIN_OPCODE | /*R=1*/ 1u << (31-10) | rc(1)); }
|
||||
inline void Assembler::tend_() { emit_int32( TEND_OPCODE | rc(1)); }
|
||||
inline void Assembler::tendall_() { emit_int32( TEND_OPCODE | /*A=1*/ 1u << (31-6) | rc(1)); }
|
||||
inline void Assembler::tabort_() { emit_int32( TABORT_OPCODE | rc(1)); }
|
||||
inline void Assembler::tabort_(Register a) { assert(a != R0, "r0 not allowed"); emit_int32( TABORT_OPCODE | ra(a) | rc(1)); }
|
||||
inline void Assembler::tabortwc_(int t, Register a, Register b) { emit_int32( TABORTWC_OPCODE | to(t) | ra(a) | rb(b) | rc(1)); }
|
||||
inline void Assembler::tabortwci_(int t, Register a, int si) { emit_int32( TABORTWCI_OPCODE | to(t) | ra(a) | sh1620(si) | rc(1)); }
|
||||
inline void Assembler::tabortdc_(int t, Register a, Register b) { emit_int32( TABORTDC_OPCODE | to(t) | ra(a) | rb(b) | rc(1)); }
|
||||
inline void Assembler::tabortdci_(int t, Register a, int si) { emit_int32( TABORTDCI_OPCODE | to(t) | ra(a) | sh1620(si) | rc(1)); }
|
||||
inline void Assembler::tsuspend_() { emit_int32( TSR_OPCODE | rc(1)); }
|
||||
inline void Assembler::tresume_() { emit_int32( TSR_OPCODE | /*L=1*/ 1u << (31-10) | rc(1)); }
|
||||
inline void Assembler::tcheck(int f) { emit_int32( TCHECK_OPCODE | bf(f)); }
|
||||
|
||||
// Deliver A Random Number (introduced with POWER9)
|
||||
inline void Assembler::darn(Register d, int l /* =1 */) { emit_int32( DARN_OPCODE | rt(d) | l14(l)); }
|
||||
|
||||
|
@ -45,13 +45,6 @@ const bool CCallingConventionRequiresIntsAsLongs = true;
|
||||
// The expected size in bytes of a cache line, used to pad data structures.
|
||||
#define DEFAULT_CACHE_LINE_SIZE 128
|
||||
|
||||
#if defined(COMPILER2) && (defined(AIX) || defined(LINUX))
|
||||
// Include Transactional Memory lock eliding optimization
|
||||
#define INCLUDE_RTM_OPT 1
|
||||
#else
|
||||
#define INCLUDE_RTM_OPT 0
|
||||
#endif
|
||||
|
||||
#define SUPPORT_RESERVED_STACK_AREA
|
||||
|
||||
// If UseSIGTRAP is active, we only use the poll bit and no polling page.
|
||||
|
@ -151,50 +151,7 @@ define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong);
|
||||
"Trace all traps the signal handler handles.") \
|
||||
\
|
||||
develop(bool, ZapMemory, false, \
|
||||
"Write 0x0101... to empty memory. Use this to ease debugging.") \
|
||||
\
|
||||
/* Use Restricted Transactional Memory for lock elision */ \
|
||||
product(bool, UseRTMLocking, false, \
|
||||
"Enable RTM lock eliding for inflated locks in compiled code") \
|
||||
\
|
||||
product(bool, UseRTMForStackLocks, false, EXPERIMENTAL, \
|
||||
"Enable RTM lock eliding for stack locks in compiled code") \
|
||||
\
|
||||
product(bool, UseRTMDeopt, false, \
|
||||
"Perform deopt and recompilation based on RTM abort ratio") \
|
||||
\
|
||||
product(int, RTMRetryCount, 5, \
|
||||
"Number of RTM retries on lock abort or busy") \
|
||||
range(0, max_jint) \
|
||||
\
|
||||
product(int, RTMSpinLoopCount, 100, EXPERIMENTAL, \
|
||||
"Spin count for lock to become free before RTM retry") \
|
||||
range(0, 32767) /* immediate operand limit on ppc */ \
|
||||
\
|
||||
product(int, RTMAbortThreshold, 1000, EXPERIMENTAL, \
|
||||
"Calculate abort ratio after this number of aborts") \
|
||||
range(0, max_jint) \
|
||||
\
|
||||
product(int, RTMLockingThreshold, 10000, EXPERIMENTAL, \
|
||||
"Lock count at which to do RTM lock eliding without " \
|
||||
"abort ratio calculation") \
|
||||
range(0, max_jint) \
|
||||
\
|
||||
product(int, RTMAbortRatio, 50, EXPERIMENTAL, \
|
||||
"Lock abort ratio at which to stop use RTM lock eliding") \
|
||||
range(0, 100) /* natural range */ \
|
||||
\
|
||||
product(int, RTMTotalCountIncrRate, 64, EXPERIMENTAL, \
|
||||
"Increment total RTM attempted lock count once every n times") \
|
||||
range(1, 32767) /* immediate operand limit on ppc */ \
|
||||
constraint(RTMTotalCountIncrRateConstraintFunc,AfterErgo) \
|
||||
\
|
||||
product(intx, RTMLockingCalculationDelay, 0, EXPERIMENTAL, \
|
||||
"Number of milliseconds to wait before start calculating aborts " \
|
||||
"for RTM locking") \
|
||||
\
|
||||
product(bool, UseRTMXendForLockBusy, true, EXPERIMENTAL, \
|
||||
"Use RTM Xend instead of Xabort when lock busy")
|
||||
"Write 0x0101... to empty memory. Use this to ease debugging.")
|
||||
|
||||
// end of ARCH_FLAGS
|
||||
|
||||
|
@ -2173,461 +2173,9 @@ address MacroAssembler::emit_trampoline_stub(int destination_toc_offset,
|
||||
return stub;
|
||||
}
|
||||
|
||||
// TM on PPC64.
|
||||
void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
|
||||
Label retry;
|
||||
bind(retry);
|
||||
ldarx(result, addr, /*hint*/ false);
|
||||
addi(result, result, simm16);
|
||||
stdcx_(result, addr);
|
||||
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
|
||||
bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
|
||||
} else {
|
||||
bne( CCR0, retry); // stXcx_ sets CCR0
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) {
|
||||
Label retry;
|
||||
bind(retry);
|
||||
lwarx(result, addr, /*hint*/ false);
|
||||
ori(result, result, uimm16);
|
||||
stwcx_(result, addr);
|
||||
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
|
||||
bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
|
||||
} else {
|
||||
bne( CCR0, retry); // stXcx_ sets CCR0
|
||||
}
|
||||
}
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
|
||||
// Update rtm_counters based on abort status
|
||||
// input: abort_status
|
||||
// rtm_counters_Reg (RTMLockingCounters*)
|
||||
void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
|
||||
// Mapping to keep PreciseRTMLockingStatistics similar to x86.
|
||||
// x86 ppc (! means inverted, ? means not the same)
|
||||
// 0 31 Set if abort caused by XABORT instruction.
|
||||
// 1 ! 7 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
|
||||
// 2 13 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
|
||||
// 3 10 Set if an internal buffer overflowed.
|
||||
// 4 ?12 Set if a debug breakpoint was hit.
|
||||
// 5 ?32 Set if an abort occurred during execution of a nested transaction.
|
||||
const int failure_bit[] = {tm_tabort, // Signal handler will set this too.
|
||||
tm_failure_persistent,
|
||||
tm_non_trans_cf,
|
||||
tm_trans_cf,
|
||||
tm_footprint_of,
|
||||
tm_failure_code,
|
||||
tm_transaction_level};
|
||||
|
||||
const int num_failure_bits = sizeof(failure_bit) / sizeof(int);
|
||||
const int num_counters = RTMLockingCounters::ABORT_STATUS_LIMIT;
|
||||
|
||||
const int bit2counter_map[][num_counters] =
|
||||
// 0 = no map; 1 = mapped, no inverted logic; -1 = mapped, inverted logic
|
||||
// Inverted logic means that if a bit is set don't count it, or vice-versa.
|
||||
// Care must be taken when mapping bits to counters as bits for a given
|
||||
// counter must be mutually exclusive. Otherwise, the counter will be
|
||||
// incremented more than once.
|
||||
// counters:
|
||||
// 0 1 2 3 4 5
|
||||
// abort , persist, conflict, overflow, debug , nested bits:
|
||||
{{ 1 , 0 , 0 , 0 , 0 , 0 }, // abort
|
||||
{ 0 , -1 , 0 , 0 , 0 , 0 }, // failure_persistent
|
||||
{ 0 , 0 , 1 , 0 , 0 , 0 }, // non_trans_cf
|
||||
{ 0 , 0 , 1 , 0 , 0 , 0 }, // trans_cf
|
||||
{ 0 , 0 , 0 , 1 , 0 , 0 }, // footprint_of
|
||||
{ 0 , 0 , 0 , 0 , -1 , 0 }, // failure_code = 0xD4
|
||||
{ 0 , 0 , 0 , 0 , 0 , 1 }}; // transaction_level > 1
|
||||
// ...
|
||||
|
||||
// Move abort_status value to R0 and use abort_status register as a
|
||||
// temporary register because R0 as third operand in ld/std is treated
|
||||
// as base address zero (value). Likewise, R0 as second operand in addi
|
||||
// is problematic because it amounts to li.
|
||||
const Register temp_Reg = abort_status;
|
||||
const Register abort_status_R0 = R0;
|
||||
mr(abort_status_R0, abort_status);
|
||||
|
||||
// Increment total abort counter.
|
||||
int counters_offs = RTMLockingCounters::abort_count_offset();
|
||||
ld(temp_Reg, counters_offs, rtm_counters_Reg);
|
||||
addi(temp_Reg, temp_Reg, 1);
|
||||
std(temp_Reg, counters_offs, rtm_counters_Reg);
|
||||
|
||||
// Increment specific abort counters.
|
||||
if (PrintPreciseRTMLockingStatistics) {
|
||||
|
||||
// #0 counter offset.
|
||||
int abortX_offs = RTMLockingCounters::abortX_count_offset();
|
||||
|
||||
for (int nbit = 0; nbit < num_failure_bits; nbit++) {
|
||||
for (int ncounter = 0; ncounter < num_counters; ncounter++) {
|
||||
if (bit2counter_map[nbit][ncounter] != 0) {
|
||||
Label check_abort;
|
||||
int abort_counter_offs = abortX_offs + (ncounter << 3);
|
||||
|
||||
if (failure_bit[nbit] == tm_transaction_level) {
|
||||
// Don't check outer transaction, TL = 1 (bit 63). Hence only
|
||||
// 11 bits in the TL field are checked to find out if failure
|
||||
// occurred in a nested transaction. This check also matches
|
||||
// the case when nesting_of = 1 (nesting overflow).
|
||||
rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 10);
|
||||
} else if (failure_bit[nbit] == tm_failure_code) {
|
||||
// Check failure code for trap or illegal caught in TM.
|
||||
// Bits 0:7 are tested as bit 7 (persistent) is copied from
|
||||
// tabort or treclaim source operand.
|
||||
// On Linux: trap or illegal is TM_CAUSE_SIGNAL (0xD4).
|
||||
rldicl(temp_Reg, abort_status_R0, 8, 56);
|
||||
cmpdi(CCR0, temp_Reg, 0xD4);
|
||||
} else {
|
||||
rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 0);
|
||||
}
|
||||
|
||||
if (bit2counter_map[nbit][ncounter] == 1) {
|
||||
beq(CCR0, check_abort);
|
||||
} else {
|
||||
bne(CCR0, check_abort);
|
||||
}
|
||||
|
||||
// We don't increment atomically.
|
||||
ld(temp_Reg, abort_counter_offs, rtm_counters_Reg);
|
||||
addi(temp_Reg, temp_Reg, 1);
|
||||
std(temp_Reg, abort_counter_offs, rtm_counters_Reg);
|
||||
|
||||
bind(check_abort);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Restore abort_status.
|
||||
mr(abort_status, abort_status_R0);
|
||||
}
|
||||
|
||||
// Branch if (random & (count-1) != 0), count is 2^n
|
||||
// tmp and CR0 are killed
|
||||
void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) {
|
||||
mftb(tmp);
|
||||
andi_(tmp, tmp, count-1);
|
||||
bne(CCR0, brLabel);
|
||||
}
|
||||
|
||||
// Perform abort ratio calculation, set no_rtm bit if high ratio.
|
||||
// input: rtm_counters_Reg (RTMLockingCounters* address) - KILLED
|
||||
void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data) {
|
||||
Label L_done, L_check_always_rtm1, L_check_always_rtm2;
|
||||
|
||||
if (RTMLockingCalculationDelay > 0) {
|
||||
// Delay calculation.
|
||||
ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr());
|
||||
cmpdi(CCR0, rtm_counters_Reg, 0);
|
||||
beq(CCR0, L_done);
|
||||
load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
|
||||
}
|
||||
// Abort ratio calculation only if abort_count > RTMAbortThreshold.
|
||||
// Aborted transactions = abort_count * 100
|
||||
// All transactions = total_count * RTMTotalCountIncrRate
|
||||
// Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
|
||||
ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg);
|
||||
if (is_simm(RTMAbortThreshold, 16)) { // cmpdi can handle 16bit immediate only.
|
||||
cmpdi(CCR0, R0, RTMAbortThreshold);
|
||||
blt(CCR0, L_check_always_rtm2); // reload of rtm_counters_Reg not necessary
|
||||
} else {
|
||||
load_const_optimized(rtm_counters_Reg, RTMAbortThreshold);
|
||||
cmpd(CCR0, R0, rtm_counters_Reg);
|
||||
blt(CCR0, L_check_always_rtm1); // reload of rtm_counters_Reg required
|
||||
}
|
||||
mulli(R0, R0, 100);
|
||||
|
||||
const Register tmpReg = rtm_counters_Reg;
|
||||
ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
|
||||
mulli(tmpReg, tmpReg, RTMTotalCountIncrRate); // allowable range: int16
|
||||
mulli(tmpReg, tmpReg, RTMAbortRatio); // allowable range: int16
|
||||
cmpd(CCR0, R0, tmpReg);
|
||||
blt(CCR0, L_check_always_rtm1); // jump to reload
|
||||
if (method_data != nullptr) {
|
||||
// Set rtm_state to "no rtm" in MDO.
|
||||
// Not using a metadata relocation. Method and Class Loader are kept alive anyway.
|
||||
// (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.)
|
||||
load_const(R0, (address)method_data + in_bytes(MethodData::rtm_state_offset()), tmpReg);
|
||||
atomic_ori_int(R0, tmpReg, NoRTM);
|
||||
}
|
||||
b(L_done);
|
||||
|
||||
bind(L_check_always_rtm1);
|
||||
load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
|
||||
bind(L_check_always_rtm2);
|
||||
ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
|
||||
int64_t thresholdValue = RTMLockingThreshold / RTMTotalCountIncrRate;
|
||||
if (is_simm(thresholdValue, 16)) { // cmpdi can handle 16bit immediate only.
|
||||
cmpdi(CCR0, tmpReg, thresholdValue);
|
||||
} else {
|
||||
load_const_optimized(R0, thresholdValue);
|
||||
cmpd(CCR0, tmpReg, R0);
|
||||
}
|
||||
blt(CCR0, L_done);
|
||||
if (method_data != nullptr) {
|
||||
// Set rtm_state to "always rtm" in MDO.
|
||||
// Not using a metadata relocation. See above.
|
||||
load_const(R0, (address)method_data + in_bytes(MethodData::rtm_state_offset()), tmpReg);
|
||||
atomic_ori_int(R0, tmpReg, UseRTM);
|
||||
}
|
||||
bind(L_done);
|
||||
}
|
||||
|
||||
// Update counters and perform abort ratio calculation.
|
||||
// input: abort_status_Reg
|
||||
void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data,
|
||||
bool profile_rtm) {
|
||||
|
||||
assert(rtm_counters != nullptr, "should not be null when profiling RTM");
|
||||
// Update rtm counters based on state at abort.
|
||||
// Reads abort_status_Reg, updates flags.
|
||||
assert_different_registers(abort_status_Reg, temp_Reg);
|
||||
load_const_optimized(temp_Reg, (address)rtm_counters, R0);
|
||||
rtm_counters_update(abort_status_Reg, temp_Reg);
|
||||
if (profile_rtm) {
|
||||
assert(rtm_counters != nullptr, "should not be null when profiling RTM");
|
||||
rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
|
||||
}
|
||||
}
|
||||
|
||||
// Retry on abort if abort's status indicates non-persistent failure.
|
||||
// inputs: retry_count_Reg
|
||||
// : abort_status_Reg
|
||||
// output: retry_count_Reg decremented by 1
|
||||
void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
|
||||
Label& retryLabel, Label* checkRetry) {
|
||||
Label doneRetry;
|
||||
|
||||
// Don't retry if failure is persistent.
|
||||
// The persistent bit is set when a (A) Disallowed operation is performed in
|
||||
// transactional state, like for instance trying to write the TFHAR after a
|
||||
// transaction is started; or when there is (B) a Nesting Overflow (too many
|
||||
// nested transactions); or when (C) the Footprint overflows (too many
|
||||
// addresses touched in TM state so there is no more space in the footprint
|
||||
// area to track them); or in case of (D) a Self-Induced Conflict, i.e. a
|
||||
// store is performed to a given address in TM state, then once in suspended
|
||||
// state the same address is accessed. Failure (A) is very unlikely to occur
|
||||
// in the JVM. Failure (D) will never occur because Suspended state is never
|
||||
// used in the JVM. Thus mostly (B) a Nesting Overflow or (C) a Footprint
|
||||
// Overflow will set the persistent bit.
|
||||
rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
|
||||
bne(CCR0, doneRetry);
|
||||
|
||||
// Don't retry if transaction was deliberately aborted, i.e. caused by a
|
||||
// tabort instruction.
|
||||
rldicr_(R0, abort_status_Reg, tm_tabort, 0);
|
||||
bne(CCR0, doneRetry);
|
||||
|
||||
// Retry if transaction aborted due to a conflict with another thread.
|
||||
if (checkRetry) { bind(*checkRetry); }
|
||||
addic_(retry_count_Reg, retry_count_Reg, -1);
|
||||
blt(CCR0, doneRetry);
|
||||
b(retryLabel);
|
||||
bind(doneRetry);
|
||||
}
|
||||
|
||||
// Spin and retry if lock is busy.
|
||||
// inputs: owner_addr_Reg (monitor address)
|
||||
// : retry_count_Reg
|
||||
// output: retry_count_Reg decremented by 1
|
||||
// CTR is killed
|
||||
void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
|
||||
Label SpinLoop, doneRetry, doRetry;
|
||||
addic_(retry_count_Reg, retry_count_Reg, -1);
|
||||
blt(CCR0, doneRetry);
|
||||
|
||||
if (RTMSpinLoopCount > 1) {
|
||||
li(R0, RTMSpinLoopCount);
|
||||
mtctr(R0);
|
||||
}
|
||||
|
||||
// low thread priority
|
||||
smt_prio_low();
|
||||
bind(SpinLoop);
|
||||
|
||||
if (RTMSpinLoopCount > 1) {
|
||||
bdz(doRetry);
|
||||
ld(R0, 0, owner_addr_Reg);
|
||||
cmpdi(CCR0, R0, 0);
|
||||
bne(CCR0, SpinLoop);
|
||||
}
|
||||
|
||||
bind(doRetry);
|
||||
|
||||
// restore thread priority to default in userspace
|
||||
#ifdef LINUX
|
||||
smt_prio_medium_low();
|
||||
#else
|
||||
smt_prio_medium();
|
||||
#endif
|
||||
|
||||
b(retryLabel);
|
||||
|
||||
bind(doneRetry);
|
||||
}
|
||||
|
||||
// Use RTM for normal stack locks.
|
||||
// Input: objReg (object to lock)
|
||||
void MacroAssembler::rtm_stack_locking(ConditionRegister flag,
|
||||
Register obj, Register mark_word, Register tmp,
|
||||
Register retry_on_abort_count_Reg,
|
||||
RTMLockingCounters* stack_rtm_counters,
|
||||
Metadata* method_data, bool profile_rtm,
|
||||
Label& DONE_LABEL, Label& IsInflated) {
|
||||
assert(UseRTMForStackLocks, "why call this otherwise?");
|
||||
Label L_rtm_retry, L_decrement_retry, L_on_abort;
|
||||
|
||||
if (RTMRetryCount > 0) {
|
||||
load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
|
||||
bind(L_rtm_retry);
|
||||
}
|
||||
andi_(R0, mark_word, markWord::monitor_value); // inflated vs stack-locked|neutral
|
||||
bne(CCR0, IsInflated);
|
||||
|
||||
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
|
||||
Label L_noincrement;
|
||||
if (RTMTotalCountIncrRate > 1) {
|
||||
branch_on_random_using_tb(tmp, RTMTotalCountIncrRate, L_noincrement);
|
||||
}
|
||||
assert(stack_rtm_counters != nullptr, "should not be null when profiling RTM");
|
||||
load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0);
|
||||
//atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
|
||||
ldx(mark_word, tmp);
|
||||
addi(mark_word, mark_word, 1);
|
||||
stdx(mark_word, tmp);
|
||||
bind(L_noincrement);
|
||||
}
|
||||
tbegin_();
|
||||
beq(CCR0, L_on_abort);
|
||||
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked.
|
||||
andi(R0, mark_word, markWord::lock_mask_in_place); // look at 2 lock bits
|
||||
cmpwi(flag, R0, markWord::unlocked_value); // bits = 01 unlocked
|
||||
beq(flag, DONE_LABEL); // all done if unlocked
|
||||
|
||||
if (UseRTMXendForLockBusy) {
|
||||
tend_();
|
||||
b(L_decrement_retry);
|
||||
} else {
|
||||
tabort_();
|
||||
}
|
||||
bind(L_on_abort);
|
||||
const Register abort_status_Reg = tmp;
|
||||
mftexasr(abort_status_Reg);
|
||||
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
|
||||
rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
|
||||
}
|
||||
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload
|
||||
if (RTMRetryCount > 0) {
|
||||
// Retry on lock abort if abort status is not permanent.
|
||||
rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
|
||||
} else {
|
||||
bind(L_decrement_retry);
|
||||
}
|
||||
}
|
||||
|
||||
// Use RTM for inflating locks
|
||||
// inputs: obj (object to lock)
|
||||
// mark_word (current header - KILLED)
|
||||
// boxReg (on-stack box address (displaced header location) - KILLED)
|
||||
void MacroAssembler::rtm_inflated_locking(ConditionRegister flag,
|
||||
Register obj, Register mark_word, Register boxReg,
|
||||
Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data, bool profile_rtm,
|
||||
Label& DONE_LABEL) {
|
||||
assert(UseRTMLocking, "why call this otherwise?");
|
||||
Label L_rtm_retry, L_decrement_retry, L_on_abort;
|
||||
// Clean monitor_value bit to get valid pointer.
|
||||
int owner_offset = in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value;
|
||||
|
||||
// Store non-null, using boxReg instead of (intptr_t)markWord::unused_mark().
|
||||
std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg);
|
||||
const Register tmpReg = boxReg;
|
||||
const Register owner_addr_Reg = mark_word;
|
||||
addi(owner_addr_Reg, mark_word, owner_offset);
|
||||
|
||||
if (RTMRetryCount > 0) {
|
||||
load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy.
|
||||
load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
|
||||
bind(L_rtm_retry);
|
||||
}
|
||||
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
|
||||
Label L_noincrement;
|
||||
if (RTMTotalCountIncrRate > 1) {
|
||||
branch_on_random_using_tb(R0, RTMTotalCountIncrRate, L_noincrement);
|
||||
}
|
||||
assert(rtm_counters != nullptr, "should not be null when profiling RTM");
|
||||
load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg);
|
||||
//atomic_inc_ptr(R0, tmpReg); We don't increment atomically
|
||||
ldx(tmpReg, R0);
|
||||
addi(tmpReg, tmpReg, 1);
|
||||
stdx(tmpReg, R0);
|
||||
bind(L_noincrement);
|
||||
}
|
||||
tbegin_();
|
||||
beq(CCR0, L_on_abort);
|
||||
// We don't reload mark word. Will only be reset at safepoint.
|
||||
ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
|
||||
cmpdi(flag, R0, 0);
|
||||
beq(flag, DONE_LABEL);
|
||||
|
||||
if (UseRTMXendForLockBusy) {
|
||||
tend_();
|
||||
b(L_decrement_retry);
|
||||
} else {
|
||||
tabort_();
|
||||
}
|
||||
bind(L_on_abort);
|
||||
const Register abort_status_Reg = tmpReg;
|
||||
mftexasr(abort_status_Reg);
|
||||
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
|
||||
rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm);
|
||||
// Restore owner_addr_Reg
|
||||
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);
|
||||
#ifdef ASSERT
|
||||
andi_(R0, mark_word, markWord::monitor_value);
|
||||
asm_assert_ne("must be inflated"); // Deflating only allowed at safepoint.
|
||||
#endif
|
||||
addi(owner_addr_Reg, mark_word, owner_offset);
|
||||
}
|
||||
if (RTMRetryCount > 0) {
|
||||
// Retry on lock abort if abort status is not permanent.
|
||||
rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
|
||||
}
|
||||
|
||||
// Appears unlocked - try to swing _owner from null to non-null.
|
||||
cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
|
||||
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
|
||||
MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
|
||||
|
||||
if (RTMRetryCount > 0) {
|
||||
// success done else retry
|
||||
b(DONE_LABEL);
|
||||
bind(L_decrement_retry);
|
||||
// Spin and retry if lock is busy.
|
||||
rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
|
||||
} else {
|
||||
bind(L_decrement_retry);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // INCLUDE_RTM_OPT
|
||||
|
||||
// "The box" is the space on the stack where we copy the object mark.
|
||||
void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
|
||||
Register temp, Register displaced_header, Register current_header,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
RTMLockingCounters* stack_rtm_counters,
|
||||
Metadata* method_data,
|
||||
bool use_rtm, bool profile_rtm) {
|
||||
Register temp, Register displaced_header, Register current_header) {
|
||||
assert_different_registers(oop, box, temp, displaced_header, current_header);
|
||||
assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register");
|
||||
Label object_has_monitor;
|
||||
@ -2644,14 +2192,6 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
|
||||
bne(flag, failure);
|
||||
}
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (UseRTMForStackLocks && use_rtm) {
|
||||
rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
|
||||
stack_rtm_counters, method_data, profile_rtm,
|
||||
success, object_has_monitor);
|
||||
}
|
||||
#endif // INCLUDE_RTM_OPT
|
||||
|
||||
// Handle existing monitor.
|
||||
// The object has an existing monitor iff (mark & monitor_value) != 0.
|
||||
andi_(temp, displaced_header, markWord::monitor_value);
|
||||
@ -2716,15 +2256,6 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
|
||||
// The object's monitor m is unlocked iff m->owner is null,
|
||||
// otherwise m->owner may contain a thread or a stack address.
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
// Use the same RTM locking code in 32- and 64-bit VM.
|
||||
if (use_rtm) {
|
||||
rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
|
||||
rtm_counters, method_data, profile_rtm, success);
|
||||
bne(flag, failure);
|
||||
} else {
|
||||
#endif // INCLUDE_RTM_OPT
|
||||
|
||||
// Try to CAS m->owner from null to current thread.
|
||||
addi(temp, displaced_header, in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value);
|
||||
cmpxchgd(/*flag=*/flag,
|
||||
@ -2751,10 +2282,6 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
|
||||
addi(recursions, recursions, 1);
|
||||
std(recursions, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), temp);
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
} // use_rtm()
|
||||
#endif
|
||||
|
||||
// flag == EQ indicates success, increment held monitor count
|
||||
// flag == NE indicates failure
|
||||
bind(success);
|
||||
@ -2763,25 +2290,11 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
|
||||
}
|
||||
|
||||
void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
|
||||
Register temp, Register displaced_header, Register current_header,
|
||||
bool use_rtm) {
|
||||
Register temp, Register displaced_header, Register current_header) {
|
||||
assert_different_registers(oop, box, temp, displaced_header, current_header);
|
||||
assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register");
|
||||
Label success, failure, object_has_monitor, notRecursive;
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (UseRTMForStackLocks && use_rtm) {
|
||||
Label L_regular_unlock;
|
||||
ld(current_header, oopDesc::mark_offset_in_bytes(), oop); // fetch markword
|
||||
andi(R0, current_header, markWord::lock_mask_in_place); // look at 2 lock bits
|
||||
cmpwi(flag, R0, markWord::unlocked_value); // bits = 01 unlocked
|
||||
bne(flag, L_regular_unlock); // else RegularLock
|
||||
tend_(); // otherwise end...
|
||||
b(success); // ... and we're done
|
||||
bind(L_regular_unlock);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (LockingMode == LM_LEGACY) {
|
||||
// Find the lock address and load the displaced header from the stack.
|
||||
ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
|
||||
@ -2793,7 +2306,6 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
|
||||
|
||||
// Handle existing monitor.
|
||||
// The object has an existing monitor iff (mark & monitor_value) != 0.
|
||||
RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
|
||||
ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
|
||||
andi_(R0, current_header, markWord::monitor_value);
|
||||
bne(CCR0, object_has_monitor);
|
||||
@ -2829,19 +2341,6 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
|
||||
addi(current_header, current_header, -(int)markWord::monitor_value); // monitor
|
||||
ld(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
|
||||
|
||||
// It's inflated.
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (use_rtm) {
|
||||
Label L_regular_inflated_unlock;
|
||||
// Clean monitor_value bit to get valid pointer
|
||||
cmpdi(flag, temp, 0);
|
||||
bne(flag, L_regular_inflated_unlock);
|
||||
tend_();
|
||||
b(success);
|
||||
bind(L_regular_inflated_unlock);
|
||||
}
|
||||
#endif
|
||||
|
||||
// In case of LM_LIGHTWEIGHT, we may reach here with (temp & ObjectMonitor::ANONYMOUS_OWNER) != 0.
|
||||
// This is handled like owner thread mismatches: We take the slow path.
|
||||
cmpd(flag, temp, R16_thread);
|
||||
|
@ -28,7 +28,6 @@
|
||||
|
||||
#include "asm/assembler.hpp"
|
||||
#include "oops/accessDecorators.hpp"
|
||||
#include "runtime/rtmLocking.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
// MacroAssembler extends Assembler by a few frequently used macros.
|
||||
@ -623,41 +622,11 @@ class MacroAssembler: public Assembler {
|
||||
enum { trampoline_stub_size = 6 * 4 };
|
||||
address emit_trampoline_stub(int destination_toc_offset, int insts_call_instruction_offset, Register Rtoc = noreg);
|
||||
|
||||
void atomic_inc_ptr(Register addr, Register result, int simm16 = 1);
|
||||
void atomic_ori_int(Register addr, Register result, int uimm16);
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
void rtm_counters_update(Register abort_status, Register rtm_counters);
|
||||
void branch_on_random_using_tb(Register tmp, int count, Label& brLabel);
|
||||
void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data);
|
||||
void rtm_profiling(Register abort_status_Reg, Register temp_Reg,
|
||||
RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
|
||||
void rtm_retry_lock_on_abort(Register retry_count, Register abort_status,
|
||||
Label& retryLabel, Label* checkRetry = nullptr);
|
||||
void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel);
|
||||
void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp,
|
||||
Register retry_on_abort_count,
|
||||
RTMLockingCounters* stack_rtm_counters,
|
||||
Metadata* method_data, bool profile_rtm,
|
||||
Label& DONE_LABEL, Label& IsInflated);
|
||||
void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box,
|
||||
Register retry_on_busy_count, Register retry_on_abort_count,
|
||||
RTMLockingCounters* rtm_counters,
|
||||
Metadata* method_data, bool profile_rtm,
|
||||
Label& DONE_LABEL);
|
||||
#endif
|
||||
|
||||
void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
RTMLockingCounters* rtm_counters = nullptr,
|
||||
RTMLockingCounters* stack_rtm_counters = nullptr,
|
||||
Metadata* method_data = nullptr,
|
||||
bool use_rtm = false, bool profile_rtm = false);
|
||||
Register tmp1, Register tmp2, Register tmp3);
|
||||
|
||||
void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
bool use_rtm = false);
|
||||
Register tmp1, Register tmp2, Register tmp3);
|
||||
|
||||
// Check if safepoint requested and if so branch
|
||||
void safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod);
|
||||
|
@ -12142,7 +12142,6 @@ instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P supe
|
||||
instruct cmpFastLock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{
|
||||
match(Set crx (FastLock oop box));
|
||||
effect(TEMP tmp1, TEMP tmp2);
|
||||
predicate(!Compile::current()->use_rtm());
|
||||
|
||||
format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2" %}
|
||||
ins_encode %{
|
||||
@ -12155,53 +12154,14 @@ instruct cmpFastLock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1,
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
// Separate version for TM. Use bound register for box to enable USE_KILL.
|
||||
instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
|
||||
match(Set crx (FastLock oop box));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL box);
|
||||
predicate(Compile::current()->use_rtm());
|
||||
|
||||
format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2, $tmp3 (TM)" %}
|
||||
ins_encode %{
|
||||
__ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
|
||||
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
|
||||
_rtm_counters, _stack_rtm_counters,
|
||||
((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
|
||||
/*RTM*/ true, ra_->C->profile_rtm());
|
||||
// If locking was successful, crx should indicate 'EQ'.
|
||||
// The compiler generates a branch to the runtime call to
|
||||
// _complete_monitor_locking_Java for the case where crx is 'NE'.
|
||||
%}
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmpFastUnlock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
|
||||
match(Set crx (FastUnlock oop box));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
||||
predicate(!Compile::current()->use_rtm());
|
||||
|
||||
format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2" %}
|
||||
ins_encode %{
|
||||
__ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
|
||||
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
|
||||
false);
|
||||
// If unlocking was successful, crx should indicate 'EQ'.
|
||||
// The compiler generates a branch to the runtime call to
|
||||
// _complete_monitor_unlocking_Java for the case where crx is 'NE'.
|
||||
%}
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
|
||||
match(Set crx (FastUnlock oop box));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
||||
predicate(Compile::current()->use_rtm());
|
||||
|
||||
format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2 (TM)" %}
|
||||
ins_encode %{
|
||||
__ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
|
||||
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
|
||||
/*RTM*/ true);
|
||||
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
||||
// If unlocking was successful, crx should indicate 'EQ'.
|
||||
// The compiler generates a branch to the runtime call to
|
||||
// _complete_monitor_unlocking_Java for the case where crx is 'NE'.
|
||||
|
@ -2277,13 +2277,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
|
||||
// --------------------------------------------------------------------------
|
||||
vep_start_pc = (intptr_t)__ pc();
|
||||
|
||||
if (UseRTMLocking) {
|
||||
// Abort RTM transaction before calling JNI
|
||||
// because critical section can be large and
|
||||
// abort anyway. Also nmethod can be deoptimized.
|
||||
__ tabort_();
|
||||
}
|
||||
|
||||
if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
|
||||
Label L_skip_barrier;
|
||||
Register klass = r_temp_1;
|
||||
@ -3168,11 +3161,6 @@ void SharedRuntime::generate_uncommon_trap_blob() {
|
||||
InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
|
||||
address start = __ pc();
|
||||
|
||||
if (UseRTMLocking) {
|
||||
// Abort RTM transaction before possible nmethod deoptimization.
|
||||
__ tabort_();
|
||||
}
|
||||
|
||||
Register unroll_block_reg = R21_tmp1;
|
||||
Register klass_index_reg = R22_tmp2;
|
||||
Register unc_trap_reg = R23_tmp3;
|
||||
@ -3323,13 +3311,6 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
|
||||
return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc;
|
||||
}
|
||||
|
||||
if (UseRTMLocking) {
|
||||
// Abort RTM transaction before calling runtime
|
||||
// because critical section can be large and so
|
||||
// will abort anyway. Also nmethod can be deoptimized.
|
||||
__ tabort_();
|
||||
}
|
||||
|
||||
bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
|
||||
|
||||
// Save registers, fpu state, and flags. Set R31 = return pc.
|
||||
|
@ -45,14 +45,6 @@
|
||||
#include <libperfstat.h>
|
||||
#endif
|
||||
|
||||
#if defined(LINUX) && defined(VM_LITTLE_ENDIAN)
|
||||
#include <sys/auxv.h>
|
||||
|
||||
#ifndef PPC_FEATURE2_HTM_NOSC
|
||||
#define PPC_FEATURE2_HTM_NOSC (1 << 24)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
bool VM_Version::_is_determine_features_test_running = false;
|
||||
uint64_t VM_Version::_dscr_val = 0;
|
||||
|
||||
@ -182,7 +174,7 @@ void VM_Version::initialize() {
|
||||
// Create and print feature-string.
|
||||
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
|
||||
jio_snprintf(buf, sizeof(buf),
|
||||
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
(has_fsqrt() ? " fsqrt" : ""),
|
||||
(has_isel() ? " isel" : ""),
|
||||
(has_lxarxeh() ? " lxarxeh" : ""),
|
||||
@ -199,7 +191,6 @@ void VM_Version::initialize() {
|
||||
(has_ldbrx() ? " ldbrx" : ""),
|
||||
(has_stdbrx() ? " stdbrx" : ""),
|
||||
(has_vshasig() ? " sha" : ""),
|
||||
(has_tm() ? " rtm" : ""),
|
||||
(has_darn() ? " darn" : ""),
|
||||
(has_brw() ? " brw" : "")
|
||||
// Make sure number of %s matches num_features!
|
||||
@ -376,49 +367,6 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
|
||||
}
|
||||
|
||||
|
||||
// Adjust RTM (Restricted Transactional Memory) flags.
|
||||
if (UseRTMLocking) {
|
||||
// If CPU or OS do not support RTM:
|
||||
if (PowerArchitecturePPC64 < 8 || PowerArchitecturePPC64 > 9) {
|
||||
vm_exit_during_initialization("RTM instructions are not available on this CPU.");
|
||||
}
|
||||
|
||||
if (!has_tm()) {
|
||||
vm_exit_during_initialization("RTM is not supported on this OS version.");
|
||||
}
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
|
||||
// RTM locking should be used only for applications with
|
||||
// high lock contention. For now we do not use it by default.
|
||||
vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
|
||||
}
|
||||
if (LockingMode != LM_LEGACY) {
|
||||
warning("UseRTMLocking requires LockingMode = 1");
|
||||
FLAG_SET_DEFAULT(UseRTMLocking, false);
|
||||
}
|
||||
#else
|
||||
// Only C2 does RTM locking optimization.
|
||||
vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
|
||||
#endif
|
||||
} else { // !UseRTMLocking
|
||||
if (UseRTMForStackLocks) {
|
||||
if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
|
||||
warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
|
||||
}
|
||||
FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
|
||||
}
|
||||
if (UseRTMDeopt) {
|
||||
FLAG_SET_DEFAULT(UseRTMDeopt, false);
|
||||
}
|
||||
#ifdef COMPILER2
|
||||
if (PrintPreciseRTMLockingStatistics) {
|
||||
FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// This machine allows unaligned memory accesses
|
||||
if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
|
||||
FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
|
||||
@ -601,7 +549,6 @@ void VM_Version::determine_features() {
|
||||
a->ldbrx(R7, R3_ARG1, R4_ARG2); // code[14] -> ldbrx
|
||||
a->stdbrx(R7, R3_ARG1, R4_ARG2); // code[15] -> stdbrx
|
||||
a->vshasigmaw(VR0, VR1, 1, 0xF); // code[16] -> vshasig
|
||||
// rtm is determined by OS
|
||||
a->darn(R7); // code[17] -> darn
|
||||
a->brw(R5, R6); // code[18] -> brw
|
||||
a->blr();
|
||||
@ -655,7 +602,6 @@ void VM_Version::determine_features() {
|
||||
if (code[feature_cntr++]) features |= ldbrx_m;
|
||||
if (code[feature_cntr++]) features |= stdbrx_m;
|
||||
if (code[feature_cntr++]) features |= vshasig_m;
|
||||
// feature rtm_m is determined by OS
|
||||
if (code[feature_cntr++]) features |= darn_m;
|
||||
if (code[feature_cntr++]) features |= brw_m;
|
||||
|
||||
@ -667,37 +613,6 @@ void VM_Version::determine_features() {
|
||||
}
|
||||
|
||||
_features = features;
|
||||
|
||||
#ifdef AIX
|
||||
// To enable it on AIX it's necessary POWER8 or above and at least AIX 7.2.
|
||||
// Actually, this is supported since AIX 7.1.. Unfortunately, this first
|
||||
// contained bugs, so that it can only be enabled after AIX 7.1.3.30.
|
||||
// The Java property os.version, which is used in RTM tests to decide
|
||||
// whether the feature is available, only knows major and minor versions.
|
||||
// We don't want to change this property, as user code might depend on it.
|
||||
// So the tests can not check on subversion 3.30, and we only enable RTM
|
||||
// with AIX 7.2.
|
||||
if (has_lqarx() && !has_brw()) { // POWER8 or POWER9
|
||||
if (os::Aix::os_version() >= 0x07020000) { // At least AIX 7.2.
|
||||
_features |= rtm_m;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(LINUX) && defined(VM_LITTLE_ENDIAN)
|
||||
unsigned long auxv = getauxval(AT_HWCAP2);
|
||||
|
||||
if (auxv & PPC_FEATURE2_HTM_NOSC) {
|
||||
if (auxv & PPC_FEATURE2_HAS_HTM) {
|
||||
// TM on POWER8 and POWER9 in compat mode (VM) is supported by the JVM.
|
||||
// TM on POWER9 DD2.1 NV (baremetal) is not supported by the JVM (TM on
|
||||
// POWER9 DD2.1 NV has a few issues that need a couple of firmware
|
||||
// and kernel workarounds, so there is a new mode only supported
|
||||
// on non-virtualized P9 machines called HTM with no Suspend Mode).
|
||||
// TM on POWER9 D2.2+ NV is not supported at all by Linux.
|
||||
_features |= rtm_m;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Power 8: Configure Data Stream Control Register.
|
||||
|
@ -49,7 +49,6 @@ protected:
|
||||
ldbrx,
|
||||
stdbrx,
|
||||
vshasig,
|
||||
rtm,
|
||||
darn,
|
||||
brw,
|
||||
num_features // last entry to count features
|
||||
@ -73,7 +72,6 @@ protected:
|
||||
ldbrx_m = (1 << ldbrx ),
|
||||
stdbrx_m = (1 << stdbrx ),
|
||||
vshasig_m = (1 << vshasig),
|
||||
rtm_m = (1 << rtm ),
|
||||
darn_m = (1 << darn ),
|
||||
brw_m = (1 << brw ),
|
||||
all_features_m = (unsigned long)-1
|
||||
@ -116,7 +114,6 @@ public:
|
||||
static bool has_ldbrx() { return (_features & ldbrx_m) != 0; }
|
||||
static bool has_stdbrx() { return (_features & stdbrx_m) != 0; }
|
||||
static bool has_vshasig() { return (_features & vshasig_m) != 0; }
|
||||
static bool has_tm() { return (_features & rtm_m) != 0; }
|
||||
static bool has_darn() { return (_features & darn_m) != 0; }
|
||||
static bool has_brw() { return (_features & brw_m) != 0; }
|
||||
|
||||
|
@ -1928,7 +1928,7 @@ bool Arguments::check_vm_args_consistency() {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
#if (defined(X86) || defined(PPC64)) && !defined(ZERO)
|
||||
#if defined(X86) && !defined(ZERO)
|
||||
if (LockingMode == LM_MONITOR && UseRTMForStackLocks) {
|
||||
jio_fprintf(defaultStream::error_stream(),
|
||||
"LockingMode == 0 (LM_MONITOR) and -XX:+UseRTMForStackLocks are mutually exclusive");
|
||||
|
@ -55,13 +55,13 @@ compiler/rtm/locking/TestRTMAbortRatio.java 8183263 generic-x64,generic-i586
|
||||
compiler/rtm/locking/TestRTMAbortThreshold.java 8183263 generic-x64,generic-i586
|
||||
compiler/rtm/locking/TestRTMAfterNonRTMDeopt.java 8183263 generic-x64,generic-i586
|
||||
compiler/rtm/locking/TestRTMDeoptOnHighAbortRatio.java 8183263 generic-x64,generic-i586
|
||||
compiler/rtm/locking/TestRTMDeoptOnLowAbortRatio.java 8183263,8307907 generic-x64,generic-i586,aix-ppc64
|
||||
compiler/rtm/locking/TestRTMDeoptOnLowAbortRatio.java 8183263 generic-x64,generic-i586
|
||||
compiler/rtm/locking/TestRTMLockingCalculationDelay.java 8183263 generic-x64,generic-i586
|
||||
compiler/rtm/locking/TestRTMLockingThreshold.java 8183263,8307907 generic-x64,generic-i586,aix-ppc64
|
||||
compiler/rtm/locking/TestRTMLockingThreshold.java 8183263 generic-x64,generic-i586
|
||||
compiler/rtm/locking/TestRTMSpinLoopCount.java 8183263 generic-x64,generic-i586
|
||||
compiler/rtm/locking/TestUseRTMDeopt.java 8183263 generic-x64,generic-i586
|
||||
compiler/rtm/locking/TestUseRTMXendForLockBusy.java 8183263,8307907 generic-x64,generic-i586,aix-ppc64
|
||||
compiler/rtm/print/TestPrintPreciseRTMLockingStatistics.java 8183263,8307907 generic-x64,generic-i586,aix-ppc64
|
||||
compiler/rtm/locking/TestUseRTMXendForLockBusy.java 8183263 generic-x64,generic-i586
|
||||
compiler/rtm/print/TestPrintPreciseRTMLockingStatistics.java 8183263 generic-x64,generic-i586
|
||||
|
||||
compiler/c2/Test8004741.java 8235801 generic-all
|
||||
|
||||
|
@ -36,8 +36,6 @@ public abstract class RTMGenericCommandLineOptionTest {
|
||||
|
||||
protected static final String RTM_INSTR_ERROR
|
||||
= "RTM instructions are not available on this CPU";
|
||||
protected static final String RTM_OS_ERROR
|
||||
= "RTM is not supported on this OS version";
|
||||
protected static final String RTM_UNSUPPORTED_VM_ERROR
|
||||
= "RTM locking optimization is not supported in this VM";
|
||||
protected static final String RTM_FOR_STACK_LOCKS_WARNING
|
||||
@ -84,7 +82,7 @@ public abstract class RTMGenericCommandLineOptionTest {
|
||||
}
|
||||
|
||||
public void runTestCases() throws Throwable {
|
||||
if (Platform.isX86() || Platform.isX64() || Platform.isPPC()) {
|
||||
if (Platform.isX86() || Platform.isX64()) {
|
||||
if (Platform.isServer()) {
|
||||
runX86SupportedVMTestCases();
|
||||
} else {
|
||||
|
@ -49,37 +49,17 @@ public class TestUseRTMLockingOptionOnUnsupportedCPU {
|
||||
"UseRTMLocking");
|
||||
String errorMessage = RTMGenericCommandLineOptionTest.RTM_INSTR_ERROR;
|
||||
|
||||
if (Platform.isX86() || Platform.isX64() || Platform.isPPC()) {
|
||||
if (Platform.isX86() || Platform.isX64()) {
|
||||
String shouldFailMessage = "JVM startup should fail with option " +
|
||||
"-XX:+UseRTMLocking on unsupported CPU";
|
||||
|
||||
try {
|
||||
// verify that we get an error when use +UseRTMLocking
|
||||
// on unsupported CPU
|
||||
CommandLineOptionTest.verifySameJVMStartup(
|
||||
new String[] { errorMessage },
|
||||
new String[] { unrecognizedOption }, shouldFailMessage,
|
||||
shouldFailMessage + ". Error message should be shown.",
|
||||
ExitCode.FAIL, "-XX:+UseRTMLocking");
|
||||
} catch (Throwable e) {
|
||||
// verify that we get an error when use +UseRTMLocking
|
||||
// on unsupported OS. It might be the case that although CPU
|
||||
// supports RTM the OS version does not support RTM
|
||||
if (Platform.isPPC()) {
|
||||
String errorMessage2 = RTMGenericCommandLineOptionTest.RTM_OS_ERROR;
|
||||
String shouldFailMessage2 = "JVM startup should fail with option " +
|
||||
"-XX:+UseRTMLocking on unsupported CPU or " +
|
||||
"OS version";
|
||||
|
||||
CommandLineOptionTest.verifySameJVMStartup(
|
||||
new String[] { errorMessage2 },
|
||||
new String[] { unrecognizedOption}, shouldFailMessage2,
|
||||
shouldFailMessage2 + ". Error message should be shown.",
|
||||
ExitCode.FAIL, "-XX:+UseRTMLocking");
|
||||
} else {
|
||||
throw e; // checking unsupported OS error is not necessary
|
||||
}
|
||||
}
|
||||
// verify that we get an error when use +UseRTMLocking
|
||||
// on unsupported CPU
|
||||
CommandLineOptionTest.verifySameJVMStartup(
|
||||
new String[] { errorMessage },
|
||||
new String[] { unrecognizedOption }, shouldFailMessage,
|
||||
shouldFailMessage + ". Error message should be shown.",
|
||||
ExitCode.FAIL, "-XX:+UseRTMLocking");
|
||||
|
||||
String shouldPassMessage = "JVM startup should pass with option "
|
||||
+ "-XX:-UseRTMLocking even on unsupported CPU";
|
||||
|
@ -63,11 +63,7 @@ public class TestRTMSpinLoopCount {
|
||||
|
||||
protected void runTestCases() throws Throwable {
|
||||
|
||||
if (Platform.isPPC()) {
|
||||
SPIN_LOOP_COUNTS = new int[] { 0, 10, 100, 1_000, 10_000 };
|
||||
} else {
|
||||
SPIN_LOOP_COUNTS = new int[] { 0, 100, 1_000, 10_000, 100_000 };
|
||||
}
|
||||
SPIN_LOOP_COUNTS = new int[] { 0, 100, 1_000, 10_000, 100_000 };
|
||||
|
||||
long[] aborts = new long[TestRTMSpinLoopCount.SPIN_LOOP_COUNTS.length];
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user