8307907: [ppc] Remove RTM locking implementation

Reviewed-by: mbaesken, rrich, stuefe
This commit is contained in:
Martin Doerr 2023-06-15 09:52:31 +00:00
parent 4c0e164238
commit de8aca27ba
15 changed files with 22 additions and 864 deletions

@ -337,15 +337,6 @@ class Assembler : public AbstractAssembler {
MFCTR_OPCODE = (MFSPR_OPCODE | 9 << SPR_0_4_SHIFT),
// Attention: Higher and lower half are inserted in reversed order.
MTTFHAR_OPCODE = (MTSPR_OPCODE | 4 << SPR_5_9_SHIFT | 0 << SPR_0_4_SHIFT),
MFTFHAR_OPCODE = (MFSPR_OPCODE | 4 << SPR_5_9_SHIFT | 0 << SPR_0_4_SHIFT),
MTTFIAR_OPCODE = (MTSPR_OPCODE | 4 << SPR_5_9_SHIFT | 1 << SPR_0_4_SHIFT),
MFTFIAR_OPCODE = (MFSPR_OPCODE | 4 << SPR_5_9_SHIFT | 1 << SPR_0_4_SHIFT),
MTTEXASR_OPCODE = (MTSPR_OPCODE | 4 << SPR_5_9_SHIFT | 2 << SPR_0_4_SHIFT),
MFTEXASR_OPCODE = (MFSPR_OPCODE | 4 << SPR_5_9_SHIFT | 2 << SPR_0_4_SHIFT),
MTTEXASRU_OPCODE = (MTSPR_OPCODE | 4 << SPR_5_9_SHIFT | 3 << SPR_0_4_SHIFT),
MFTEXASRU_OPCODE = (MFSPR_OPCODE | 4 << SPR_5_9_SHIFT | 3 << SPR_0_4_SHIFT),
MTVRSAVE_OPCODE = (MTSPR_OPCODE | 8 << SPR_5_9_SHIFT | 0 << SPR_0_4_SHIFT),
MFVRSAVE_OPCODE = (MFSPR_OPCODE | 8 << SPR_5_9_SHIFT | 0 << SPR_0_4_SHIFT),
@ -766,17 +757,6 @@ class Assembler : public AbstractAssembler {
// Vector Permute and Xor (introduced with Power 8)
VPERMXOR_OPCODE = (4u << OPCODE_SHIFT | 45u),
// Transactional Memory instructions (introduced with Power 8)
TBEGIN_OPCODE = (31u << OPCODE_SHIFT | 654u << 1),
TEND_OPCODE = (31u << OPCODE_SHIFT | 686u << 1),
TABORT_OPCODE = (31u << OPCODE_SHIFT | 910u << 1),
TABORTWC_OPCODE = (31u << OPCODE_SHIFT | 782u << 1),
TABORTWCI_OPCODE = (31u << OPCODE_SHIFT | 846u << 1),
TABORTDC_OPCODE = (31u << OPCODE_SHIFT | 814u << 1),
TABORTDCI_OPCODE = (31u << OPCODE_SHIFT | 878u << 1),
TSR_OPCODE = (31u << OPCODE_SHIFT | 750u << 1),
TCHECK_OPCODE = (31u << OPCODE_SHIFT | 718u << 1),
// Icache and dcache related instructions
DCBA_OPCODE = (31u << OPCODE_SHIFT | 758u << 1),
DCBZ_OPCODE = (31u << OPCODE_SHIFT | 1014u << 1),
@ -1814,33 +1794,6 @@ class Assembler : public AbstractAssembler {
// Data Stream Control Register
inline void mtdscr(Register s1);
inline void mfdscr(Register d );
// Transactional Memory Registers
inline void mftfhar(Register d);
inline void mftfiar(Register d);
inline void mftexasr(Register d);
inline void mftexasru(Register d);
// TEXASR bit description
enum transaction_failure_reason {
// Upper half (TEXASRU):
tm_failure_code = 0, // The Failure Code is copied from tabort or treclaim operand.
tm_failure_persistent = 7, // The failure is likely to recur on each execution.
tm_disallowed = 8, // The instruction is not permitted.
tm_nesting_of = 9, // The maximum transaction level was exceeded.
tm_footprint_of = 10, // The tracking limit for transactional storage accesses was exceeded.
tm_self_induced_cf = 11, // A self-induced conflict occurred in Suspended state.
tm_non_trans_cf = 12, // A conflict occurred with a non-transactional access by another processor.
tm_trans_cf = 13, // A conflict occurred with another transaction.
tm_translation_cf = 14, // A conflict occurred with a TLB invalidation.
tm_inst_fetch_cf = 16, // An instruction fetch was performed from a block that was previously written transactionally.
tm_tabort = 31, // Termination was caused by the execution of an abort instruction.
// Lower half:
tm_suspended = 32, // Failure was recorded in Suspended state.
tm_failure_summary = 36, // Failure has been detected and recorded.
tm_tfiar_exact = 37, // Value in the TFIAR is exact.
tm_rot = 38, // Rollback-only transaction.
tm_transaction_level = 52, // Transaction level (nesting depth + 1).
};
// PPC 1, section 2.4.1 Branch Instructions
inline void b( address a, relocInfo::relocType rt = relocInfo::none);
@ -2452,25 +2405,6 @@ class Assembler : public AbstractAssembler {
// Vector Permute and Xor (introduced with Power 8)
inline void vpermxor( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
// Transactional Memory instructions (introduced with Power 8)
inline void tbegin_(); // R=0
inline void tbeginrot_(); // R=1 Rollback-Only Transaction
inline void tend_(); // A=0
inline void tendall_(); // A=1
inline void tabort_();
inline void tabort_(Register a);
inline void tabortwc_(int t, Register a, Register b);
inline void tabortwci_(int t, Register a, int si);
inline void tabortdc_(int t, Register a, Register b);
inline void tabortdci_(int t, Register a, int si);
inline void tsuspend_(); // tsr with L=0
inline void tresume_(); // tsr with L=1
inline void tcheck(int f);
static bool is_tbegin(int x) {
return TBEGIN_OPCODE == (x & (0x3f << OPCODE_SHIFT | 0x3ff << 1));
}
// The following encoders use r0 as second operand. These instructions
// read r0 as '0'.
inline void lwzx( Register d, Register s2);

@ -433,13 +433,7 @@ inline void Assembler::mftb(Register d ) { emit_int32(MFTB_OPCODE | rt
// Data Stream Control Register
inline void Assembler::mtdscr(Register s1) { emit_int32(MTDSCR_OPCODE | rs(s1)); }
inline void Assembler::mfdscr(Register d ) { emit_int32(MFDSCR_OPCODE | rt(d)); }
// Transactional Memory Registers
inline void Assembler::mftfhar(Register d ) { emit_int32(MFTFHAR_OPCODE | rt(d)); }
inline void Assembler::mftfiar(Register d ) { emit_int32(MFTFIAR_OPCODE | rt(d)); }
inline void Assembler::mftexasr(Register d ) { emit_int32(MFTEXASR_OPCODE | rt(d)); }
inline void Assembler::mftexasru(Register d ) { emit_int32(MFTEXASRU_OPCODE | rt(d)); }
// SAP JVM 2006-02-13 PPC branch instruction.
// PPC 1, section 2.4.1 Branch Instructions
inline void Assembler::b( address a, relocInfo::relocType rt) { emit_data(BXX_OPCODE| li(disp( intptr_t(a), intptr_t(pc()))) |aa(0)|lk(0), rt); }
inline void Assembler::b( Label& L) { b( target(L)); }
@ -1048,21 +1042,6 @@ inline void Assembler::vpmsumw( VectorRegister d, VectorRegister a, VectorRegis
// Vector Permute and Xor (introduced with Power 8)
inline void Assembler::vpermxor( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VPERMXOR_OPCODE | vrt(d) | vra(a) | vrb(b) | vrc(c)); }
// Transactional Memory instructions (introduced with Power 8)
inline void Assembler::tbegin_() { emit_int32( TBEGIN_OPCODE | rc(1)); }
inline void Assembler::tbeginrot_() { emit_int32( TBEGIN_OPCODE | /*R=1*/ 1u << (31-10) | rc(1)); }
inline void Assembler::tend_() { emit_int32( TEND_OPCODE | rc(1)); }
inline void Assembler::tendall_() { emit_int32( TEND_OPCODE | /*A=1*/ 1u << (31-6) | rc(1)); }
inline void Assembler::tabort_() { emit_int32( TABORT_OPCODE | rc(1)); }
inline void Assembler::tabort_(Register a) { assert(a != R0, "r0 not allowed"); emit_int32( TABORT_OPCODE | ra(a) | rc(1)); }
inline void Assembler::tabortwc_(int t, Register a, Register b) { emit_int32( TABORTWC_OPCODE | to(t) | ra(a) | rb(b) | rc(1)); }
inline void Assembler::tabortwci_(int t, Register a, int si) { emit_int32( TABORTWCI_OPCODE | to(t) | ra(a) | sh1620(si) | rc(1)); }
inline void Assembler::tabortdc_(int t, Register a, Register b) { emit_int32( TABORTDC_OPCODE | to(t) | ra(a) | rb(b) | rc(1)); }
inline void Assembler::tabortdci_(int t, Register a, int si) { emit_int32( TABORTDCI_OPCODE | to(t) | ra(a) | sh1620(si) | rc(1)); }
inline void Assembler::tsuspend_() { emit_int32( TSR_OPCODE | rc(1)); }
inline void Assembler::tresume_() { emit_int32( TSR_OPCODE | /*L=1*/ 1u << (31-10) | rc(1)); }
inline void Assembler::tcheck(int f) { emit_int32( TCHECK_OPCODE | bf(f)); }
// Deliver A Random Number (introduced with POWER9)
inline void Assembler::darn(Register d, int l /* =1 */) { emit_int32( DARN_OPCODE | rt(d) | l14(l)); }

@ -45,13 +45,6 @@ const bool CCallingConventionRequiresIntsAsLongs = true;
// The expected size in bytes of a cache line, used to pad data structures.
#define DEFAULT_CACHE_LINE_SIZE 128
#if defined(COMPILER2) && (defined(AIX) || defined(LINUX))
// Include Transactional Memory lock eliding optimization
#define INCLUDE_RTM_OPT 1
#else
#define INCLUDE_RTM_OPT 0
#endif
#define SUPPORT_RESERVED_STACK_AREA
// If UseSIGTRAP is active, we only use the poll bit and no polling page.

@ -151,50 +151,7 @@ define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong);
"Trace all traps the signal handler handles.") \
\
develop(bool, ZapMemory, false, \
"Write 0x0101... to empty memory. Use this to ease debugging.") \
\
/* Use Restricted Transactional Memory for lock elision */ \
product(bool, UseRTMLocking, false, \
"Enable RTM lock eliding for inflated locks in compiled code") \
\
product(bool, UseRTMForStackLocks, false, EXPERIMENTAL, \
"Enable RTM lock eliding for stack locks in compiled code") \
\
product(bool, UseRTMDeopt, false, \
"Perform deopt and recompilation based on RTM abort ratio") \
\
product(int, RTMRetryCount, 5, \
"Number of RTM retries on lock abort or busy") \
range(0, max_jint) \
\
product(int, RTMSpinLoopCount, 100, EXPERIMENTAL, \
"Spin count for lock to become free before RTM retry") \
range(0, 32767) /* immediate operand limit on ppc */ \
\
product(int, RTMAbortThreshold, 1000, EXPERIMENTAL, \
"Calculate abort ratio after this number of aborts") \
range(0, max_jint) \
\
product(int, RTMLockingThreshold, 10000, EXPERIMENTAL, \
"Lock count at which to do RTM lock eliding without " \
"abort ratio calculation") \
range(0, max_jint) \
\
product(int, RTMAbortRatio, 50, EXPERIMENTAL, \
"Lock abort ratio at which to stop use RTM lock eliding") \
range(0, 100) /* natural range */ \
\
product(int, RTMTotalCountIncrRate, 64, EXPERIMENTAL, \
"Increment total RTM attempted lock count once every n times") \
range(1, 32767) /* immediate operand limit on ppc */ \
constraint(RTMTotalCountIncrRateConstraintFunc,AfterErgo) \
\
product(intx, RTMLockingCalculationDelay, 0, EXPERIMENTAL, \
"Number of milliseconds to wait before start calculating aborts " \
"for RTM locking") \
\
product(bool, UseRTMXendForLockBusy, true, EXPERIMENTAL, \
"Use RTM Xend instead of Xabort when lock busy")
"Write 0x0101... to empty memory. Use this to ease debugging.")
// end of ARCH_FLAGS

@ -2173,461 +2173,9 @@ address MacroAssembler::emit_trampoline_stub(int destination_toc_offset,
return stub;
}
// TM on PPC64.
void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
Label retry;
bind(retry);
ldarx(result, addr, /*hint*/ false);
addi(result, result, simm16);
stdcx_(result, addr);
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
} else {
bne( CCR0, retry); // stXcx_ sets CCR0
}
}
void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) {
Label retry;
bind(retry);
lwarx(result, addr, /*hint*/ false);
ori(result, result, uimm16);
stwcx_(result, addr);
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
} else {
bne( CCR0, retry); // stXcx_ sets CCR0
}
}
#if INCLUDE_RTM_OPT
// Update rtm_counters based on abort status
// input: abort_status
// rtm_counters_Reg (RTMLockingCounters*)
void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
// Mapping to keep PreciseRTMLockingStatistics similar to x86.
// x86 ppc (! means inverted, ? means not the same)
// 0 31 Set if abort caused by XABORT instruction.
// 1 ! 7 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
// 2 13 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
// 3 10 Set if an internal buffer overflowed.
// 4 ?12 Set if a debug breakpoint was hit.
// 5 ?32 Set if an abort occurred during execution of a nested transaction.
const int failure_bit[] = {tm_tabort, // Signal handler will set this too.
tm_failure_persistent,
tm_non_trans_cf,
tm_trans_cf,
tm_footprint_of,
tm_failure_code,
tm_transaction_level};
const int num_failure_bits = sizeof(failure_bit) / sizeof(int);
const int num_counters = RTMLockingCounters::ABORT_STATUS_LIMIT;
const int bit2counter_map[][num_counters] =
// 0 = no map; 1 = mapped, no inverted logic; -1 = mapped, inverted logic
// Inverted logic means that if a bit is set don't count it, or vice-versa.
// Care must be taken when mapping bits to counters as bits for a given
// counter must be mutually exclusive. Otherwise, the counter will be
// incremented more than once.
// counters:
// 0 1 2 3 4 5
// abort , persist, conflict, overflow, debug , nested bits:
{{ 1 , 0 , 0 , 0 , 0 , 0 }, // abort
{ 0 , -1 , 0 , 0 , 0 , 0 }, // failure_persistent
{ 0 , 0 , 1 , 0 , 0 , 0 }, // non_trans_cf
{ 0 , 0 , 1 , 0 , 0 , 0 }, // trans_cf
{ 0 , 0 , 0 , 1 , 0 , 0 }, // footprint_of
{ 0 , 0 , 0 , 0 , -1 , 0 }, // failure_code = 0xD4
{ 0 , 0 , 0 , 0 , 0 , 1 }}; // transaction_level > 1
// ...
// Move abort_status value to R0 and use abort_status register as a
// temporary register because R0 as third operand in ld/std is treated
// as base address zero (value). Likewise, R0 as second operand in addi
// is problematic because it amounts to li.
const Register temp_Reg = abort_status;
const Register abort_status_R0 = R0;
mr(abort_status_R0, abort_status);
// Increment total abort counter.
int counters_offs = RTMLockingCounters::abort_count_offset();
ld(temp_Reg, counters_offs, rtm_counters_Reg);
addi(temp_Reg, temp_Reg, 1);
std(temp_Reg, counters_offs, rtm_counters_Reg);
// Increment specific abort counters.
if (PrintPreciseRTMLockingStatistics) {
// #0 counter offset.
int abortX_offs = RTMLockingCounters::abortX_count_offset();
for (int nbit = 0; nbit < num_failure_bits; nbit++) {
for (int ncounter = 0; ncounter < num_counters; ncounter++) {
if (bit2counter_map[nbit][ncounter] != 0) {
Label check_abort;
int abort_counter_offs = abortX_offs + (ncounter << 3);
if (failure_bit[nbit] == tm_transaction_level) {
// Don't check outer transaction, TL = 1 (bit 63). Hence only
// 11 bits in the TL field are checked to find out if failure
// occurred in a nested transaction. This check also matches
// the case when nesting_of = 1 (nesting overflow).
rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 10);
} else if (failure_bit[nbit] == tm_failure_code) {
// Check failure code for trap or illegal caught in TM.
// Bits 0:7 are tested as bit 7 (persistent) is copied from
// tabort or treclaim source operand.
// On Linux: trap or illegal is TM_CAUSE_SIGNAL (0xD4).
rldicl(temp_Reg, abort_status_R0, 8, 56);
cmpdi(CCR0, temp_Reg, 0xD4);
} else {
rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 0);
}
if (bit2counter_map[nbit][ncounter] == 1) {
beq(CCR0, check_abort);
} else {
bne(CCR0, check_abort);
}
// We don't increment atomically.
ld(temp_Reg, abort_counter_offs, rtm_counters_Reg);
addi(temp_Reg, temp_Reg, 1);
std(temp_Reg, abort_counter_offs, rtm_counters_Reg);
bind(check_abort);
}
}
}
}
// Restore abort_status.
mr(abort_status, abort_status_R0);
}
// Branch if (random & (count-1) != 0), count is 2^n
// tmp and CR0 are killed
void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) {
mftb(tmp);
andi_(tmp, tmp, count-1);
bne(CCR0, brLabel);
}
// Perform abort ratio calculation, set no_rtm bit if high ratio.
// input: rtm_counters_Reg (RTMLockingCounters* address) - KILLED
void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg,
RTMLockingCounters* rtm_counters,
Metadata* method_data) {
Label L_done, L_check_always_rtm1, L_check_always_rtm2;
if (RTMLockingCalculationDelay > 0) {
// Delay calculation.
ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr());
cmpdi(CCR0, rtm_counters_Reg, 0);
beq(CCR0, L_done);
load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
}
// Abort ratio calculation only if abort_count > RTMAbortThreshold.
// Aborted transactions = abort_count * 100
// All transactions = total_count * RTMTotalCountIncrRate
// Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg);
if (is_simm(RTMAbortThreshold, 16)) { // cmpdi can handle 16bit immediate only.
cmpdi(CCR0, R0, RTMAbortThreshold);
blt(CCR0, L_check_always_rtm2); // reload of rtm_counters_Reg not necessary
} else {
load_const_optimized(rtm_counters_Reg, RTMAbortThreshold);
cmpd(CCR0, R0, rtm_counters_Reg);
blt(CCR0, L_check_always_rtm1); // reload of rtm_counters_Reg required
}
mulli(R0, R0, 100);
const Register tmpReg = rtm_counters_Reg;
ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
mulli(tmpReg, tmpReg, RTMTotalCountIncrRate); // allowable range: int16
mulli(tmpReg, tmpReg, RTMAbortRatio); // allowable range: int16
cmpd(CCR0, R0, tmpReg);
blt(CCR0, L_check_always_rtm1); // jump to reload
if (method_data != nullptr) {
// Set rtm_state to "no rtm" in MDO.
// Not using a metadata relocation. Method and Class Loader are kept alive anyway.
// (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.)
load_const(R0, (address)method_data + in_bytes(MethodData::rtm_state_offset()), tmpReg);
atomic_ori_int(R0, tmpReg, NoRTM);
}
b(L_done);
bind(L_check_always_rtm1);
load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
bind(L_check_always_rtm2);
ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
int64_t thresholdValue = RTMLockingThreshold / RTMTotalCountIncrRate;
if (is_simm(thresholdValue, 16)) { // cmpdi can handle 16bit immediate only.
cmpdi(CCR0, tmpReg, thresholdValue);
} else {
load_const_optimized(R0, thresholdValue);
cmpd(CCR0, tmpReg, R0);
}
blt(CCR0, L_done);
if (method_data != nullptr) {
// Set rtm_state to "always rtm" in MDO.
// Not using a metadata relocation. See above.
load_const(R0, (address)method_data + in_bytes(MethodData::rtm_state_offset()), tmpReg);
atomic_ori_int(R0, tmpReg, UseRTM);
}
bind(L_done);
}
// Update counters and perform abort ratio calculation.
// input: abort_status_Reg
void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg,
RTMLockingCounters* rtm_counters,
Metadata* method_data,
bool profile_rtm) {
assert(rtm_counters != nullptr, "should not be null when profiling RTM");
// Update rtm counters based on state at abort.
// Reads abort_status_Reg, updates flags.
assert_different_registers(abort_status_Reg, temp_Reg);
load_const_optimized(temp_Reg, (address)rtm_counters, R0);
rtm_counters_update(abort_status_Reg, temp_Reg);
if (profile_rtm) {
assert(rtm_counters != nullptr, "should not be null when profiling RTM");
rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
}
}
// Retry on abort if abort's status indicates non-persistent failure.
// inputs: retry_count_Reg
// : abort_status_Reg
// output: retry_count_Reg decremented by 1
void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
Label& retryLabel, Label* checkRetry) {
Label doneRetry;
// Don't retry if failure is persistent.
// The persistent bit is set when a (A) Disallowed operation is performed in
// transactional state, like for instance trying to write the TFHAR after a
// transaction is started; or when there is (B) a Nesting Overflow (too many
// nested transactions); or when (C) the Footprint overflows (too many
// addresses touched in TM state so there is no more space in the footprint
// area to track them); or in case of (D) a Self-Induced Conflict, i.e. a
// store is performed to a given address in TM state, then once in suspended
// state the same address is accessed. Failure (A) is very unlikely to occur
// in the JVM. Failure (D) will never occur because Suspended state is never
// used in the JVM. Thus mostly (B) a Nesting Overflow or (C) a Footprint
// Overflow will set the persistent bit.
rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
bne(CCR0, doneRetry);
// Don't retry if transaction was deliberately aborted, i.e. caused by a
// tabort instruction.
rldicr_(R0, abort_status_Reg, tm_tabort, 0);
bne(CCR0, doneRetry);
// Retry if transaction aborted due to a conflict with another thread.
if (checkRetry) { bind(*checkRetry); }
addic_(retry_count_Reg, retry_count_Reg, -1);
blt(CCR0, doneRetry);
b(retryLabel);
bind(doneRetry);
}
// Spin and retry if lock is busy.
// inputs: owner_addr_Reg (monitor address)
// : retry_count_Reg
// output: retry_count_Reg decremented by 1
// CTR is killed
void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
Label SpinLoop, doneRetry, doRetry;
addic_(retry_count_Reg, retry_count_Reg, -1);
blt(CCR0, doneRetry);
if (RTMSpinLoopCount > 1) {
li(R0, RTMSpinLoopCount);
mtctr(R0);
}
// low thread priority
smt_prio_low();
bind(SpinLoop);
if (RTMSpinLoopCount > 1) {
bdz(doRetry);
ld(R0, 0, owner_addr_Reg);
cmpdi(CCR0, R0, 0);
bne(CCR0, SpinLoop);
}
bind(doRetry);
// restore thread priority to default in userspace
#ifdef LINUX
smt_prio_medium_low();
#else
smt_prio_medium();
#endif
b(retryLabel);
bind(doneRetry);
}
// Use RTM for normal stack locks.
// Input: objReg (object to lock)
void MacroAssembler::rtm_stack_locking(ConditionRegister flag,
Register obj, Register mark_word, Register tmp,
Register retry_on_abort_count_Reg,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL, Label& IsInflated) {
assert(UseRTMForStackLocks, "why call this otherwise?");
Label L_rtm_retry, L_decrement_retry, L_on_abort;
if (RTMRetryCount > 0) {
load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
bind(L_rtm_retry);
}
andi_(R0, mark_word, markWord::monitor_value); // inflated vs stack-locked|neutral
bne(CCR0, IsInflated);
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
Label L_noincrement;
if (RTMTotalCountIncrRate > 1) {
branch_on_random_using_tb(tmp, RTMTotalCountIncrRate, L_noincrement);
}
assert(stack_rtm_counters != nullptr, "should not be null when profiling RTM");
load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0);
//atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
ldx(mark_word, tmp);
addi(mark_word, mark_word, 1);
stdx(mark_word, tmp);
bind(L_noincrement);
}
tbegin_();
beq(CCR0, L_on_abort);
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked.
andi(R0, mark_word, markWord::lock_mask_in_place); // look at 2 lock bits
cmpwi(flag, R0, markWord::unlocked_value); // bits = 01 unlocked
beq(flag, DONE_LABEL); // all done if unlocked
if (UseRTMXendForLockBusy) {
tend_();
b(L_decrement_retry);
} else {
tabort_();
}
bind(L_on_abort);
const Register abort_status_Reg = tmp;
mftexasr(abort_status_Reg);
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
}
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload
if (RTMRetryCount > 0) {
// Retry on lock abort if abort status is not permanent.
rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
} else {
bind(L_decrement_retry);
}
}
// Use RTM for inflating locks
// inputs: obj (object to lock)
// mark_word (current header - KILLED)
// boxReg (on-stack box address (displaced header location) - KILLED)
void MacroAssembler::rtm_inflated_locking(ConditionRegister flag,
Register obj, Register mark_word, Register boxReg,
Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
RTMLockingCounters* rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL) {
assert(UseRTMLocking, "why call this otherwise?");
Label L_rtm_retry, L_decrement_retry, L_on_abort;
// Clean monitor_value bit to get valid pointer.
int owner_offset = in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value;
// Store non-null, using boxReg instead of (intptr_t)markWord::unused_mark().
std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg);
const Register tmpReg = boxReg;
const Register owner_addr_Reg = mark_word;
addi(owner_addr_Reg, mark_word, owner_offset);
if (RTMRetryCount > 0) {
load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy.
load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
bind(L_rtm_retry);
}
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
Label L_noincrement;
if (RTMTotalCountIncrRate > 1) {
branch_on_random_using_tb(R0, RTMTotalCountIncrRate, L_noincrement);
}
assert(rtm_counters != nullptr, "should not be null when profiling RTM");
load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg);
//atomic_inc_ptr(R0, tmpReg); We don't increment atomically
ldx(tmpReg, R0);
addi(tmpReg, tmpReg, 1);
stdx(tmpReg, R0);
bind(L_noincrement);
}
tbegin_();
beq(CCR0, L_on_abort);
// We don't reload mark word. Will only be reset at safepoint.
ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
cmpdi(flag, R0, 0);
beq(flag, DONE_LABEL);
if (UseRTMXendForLockBusy) {
tend_();
b(L_decrement_retry);
} else {
tabort_();
}
bind(L_on_abort);
const Register abort_status_Reg = tmpReg;
mftexasr(abort_status_Reg);
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm);
// Restore owner_addr_Reg
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);
#ifdef ASSERT
andi_(R0, mark_word, markWord::monitor_value);
asm_assert_ne("must be inflated"); // Deflating only allowed at safepoint.
#endif
addi(owner_addr_Reg, mark_word, owner_offset);
}
if (RTMRetryCount > 0) {
// Retry on lock abort if abort status is not permanent.
rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
}
// Appears unlocked - try to swing _owner from null to non-null.
cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
if (RTMRetryCount > 0) {
// success done else retry
b(DONE_LABEL);
bind(L_decrement_retry);
// Spin and retry if lock is busy.
rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
} else {
bind(L_decrement_retry);
}
}
#endif // INCLUDE_RTM_OPT
// "The box" is the space on the stack where we copy the object mark.
void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
Register temp, Register displaced_header, Register current_header,
RTMLockingCounters* rtm_counters,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data,
bool use_rtm, bool profile_rtm) {
Register temp, Register displaced_header, Register current_header) {
assert_different_registers(oop, box, temp, displaced_header, current_header);
assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register");
Label object_has_monitor;
@ -2644,14 +2192,6 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
bne(flag, failure);
}
#if INCLUDE_RTM_OPT
if (UseRTMForStackLocks && use_rtm) {
rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
stack_rtm_counters, method_data, profile_rtm,
success, object_has_monitor);
}
#endif // INCLUDE_RTM_OPT
// Handle existing monitor.
// The object has an existing monitor iff (mark & monitor_value) != 0.
andi_(temp, displaced_header, markWord::monitor_value);
@ -2716,15 +2256,6 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
// The object's monitor m is unlocked iff m->owner is null,
// otherwise m->owner may contain a thread or a stack address.
#if INCLUDE_RTM_OPT
// Use the same RTM locking code in 32- and 64-bit VM.
if (use_rtm) {
rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
rtm_counters, method_data, profile_rtm, success);
bne(flag, failure);
} else {
#endif // INCLUDE_RTM_OPT
// Try to CAS m->owner from null to current thread.
addi(temp, displaced_header, in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value);
cmpxchgd(/*flag=*/flag,
@ -2751,10 +2282,6 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
addi(recursions, recursions, 1);
std(recursions, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), temp);
#if INCLUDE_RTM_OPT
} // use_rtm()
#endif
// flag == EQ indicates success, increment held monitor count
// flag == NE indicates failure
bind(success);
@ -2763,25 +2290,11 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
}
void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
Register temp, Register displaced_header, Register current_header,
bool use_rtm) {
Register temp, Register displaced_header, Register current_header) {
assert_different_registers(oop, box, temp, displaced_header, current_header);
assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register");
Label success, failure, object_has_monitor, notRecursive;
#if INCLUDE_RTM_OPT
if (UseRTMForStackLocks && use_rtm) {
Label L_regular_unlock;
ld(current_header, oopDesc::mark_offset_in_bytes(), oop); // fetch markword
andi(R0, current_header, markWord::lock_mask_in_place); // look at 2 lock bits
cmpwi(flag, R0, markWord::unlocked_value); // bits = 01 unlocked
bne(flag, L_regular_unlock); // else RegularLock
tend_(); // otherwise end...
b(success); // ... and we're done
bind(L_regular_unlock);
}
#endif
if (LockingMode == LM_LEGACY) {
// Find the lock address and load the displaced header from the stack.
ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
@ -2793,7 +2306,6 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
// Handle existing monitor.
// The object has an existing monitor iff (mark & monitor_value) != 0.
RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
andi_(R0, current_header, markWord::monitor_value);
bne(CCR0, object_has_monitor);
@ -2829,19 +2341,6 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
addi(current_header, current_header, -(int)markWord::monitor_value); // monitor
ld(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
// It's inflated.
#if INCLUDE_RTM_OPT
if (use_rtm) {
Label L_regular_inflated_unlock;
// Clean monitor_value bit to get valid pointer
cmpdi(flag, temp, 0);
bne(flag, L_regular_inflated_unlock);
tend_();
b(success);
bind(L_regular_inflated_unlock);
}
#endif
// In case of LM_LIGHTWEIGHT, we may reach here with (temp & ObjectMonitor::ANONYMOUS_OWNER) != 0.
// This is handled like owner thread mismatches: We take the slow path.
cmpd(flag, temp, R16_thread);

@ -28,7 +28,6 @@
#include "asm/assembler.hpp"
#include "oops/accessDecorators.hpp"
#include "runtime/rtmLocking.hpp"
#include "utilities/macros.hpp"
// MacroAssembler extends Assembler by a few frequently used macros.
@ -623,41 +622,11 @@ class MacroAssembler: public Assembler {
enum { trampoline_stub_size = 6 * 4 };
address emit_trampoline_stub(int destination_toc_offset, int insts_call_instruction_offset, Register Rtoc = noreg);
void atomic_inc_ptr(Register addr, Register result, int simm16 = 1);
void atomic_ori_int(Register addr, Register result, int uimm16);
#if INCLUDE_RTM_OPT
void rtm_counters_update(Register abort_status, Register rtm_counters);
void branch_on_random_using_tb(Register tmp, int count, Label& brLabel);
void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters,
Metadata* method_data);
void rtm_profiling(Register abort_status_Reg, Register temp_Reg,
RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
void rtm_retry_lock_on_abort(Register retry_count, Register abort_status,
Label& retryLabel, Label* checkRetry = nullptr);
void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel);
void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp,
Register retry_on_abort_count,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL, Label& IsInflated);
void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box,
Register retry_on_busy_count, Register retry_on_abort_count,
RTMLockingCounters* rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL);
#endif
void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
Register tmp1, Register tmp2, Register tmp3,
RTMLockingCounters* rtm_counters = nullptr,
RTMLockingCounters* stack_rtm_counters = nullptr,
Metadata* method_data = nullptr,
bool use_rtm = false, bool profile_rtm = false);
Register tmp1, Register tmp2, Register tmp3);
void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
Register tmp1, Register tmp2, Register tmp3,
bool use_rtm = false);
Register tmp1, Register tmp2, Register tmp3);
// Check if safepoint requested and if so branch
void safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod);

@ -12142,7 +12142,6 @@ instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P supe
instruct cmpFastLock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{
match(Set crx (FastLock oop box));
effect(TEMP tmp1, TEMP tmp2);
predicate(!Compile::current()->use_rtm());
format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2" %}
ins_encode %{
@ -12155,53 +12154,14 @@ instruct cmpFastLock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1,
ins_pipe(pipe_class_compare);
%}
// Separate version for TM. Use bound register for box to enable USE_KILL.
instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
match(Set crx (FastLock oop box));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL box);
predicate(Compile::current()->use_rtm());
format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2, $tmp3 (TM)" %}
ins_encode %{
__ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
_rtm_counters, _stack_rtm_counters,
((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
/*RTM*/ true, ra_->C->profile_rtm());
// If locking was successful, crx should indicate 'EQ'.
// The compiler generates a branch to the runtime call to
// _complete_monitor_locking_Java for the case where crx is 'NE'.
%}
ins_pipe(pipe_class_compare);
%}
instruct cmpFastUnlock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
match(Set crx (FastUnlock oop box));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
predicate(!Compile::current()->use_rtm());
format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2" %}
ins_encode %{
__ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
false);
// If unlocking was successful, crx should indicate 'EQ'.
// The compiler generates a branch to the runtime call to
// _complete_monitor_unlocking_Java for the case where crx is 'NE'.
%}
ins_pipe(pipe_class_compare);
%}
instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
match(Set crx (FastUnlock oop box));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
predicate(Compile::current()->use_rtm());
format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2 (TM)" %}
ins_encode %{
__ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
/*RTM*/ true);
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
// If unlocking was successful, crx should indicate 'EQ'.
// The compiler generates a branch to the runtime call to
// _complete_monitor_unlocking_Java for the case where crx is 'NE'.

@ -2277,13 +2277,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// --------------------------------------------------------------------------
vep_start_pc = (intptr_t)__ pc();
if (UseRTMLocking) {
// Abort RTM transaction before calling JNI
// because critical section can be large and
// abort anyway. Also nmethod can be deoptimized.
__ tabort_();
}
if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
Label L_skip_barrier;
Register klass = r_temp_1;
@ -3168,11 +3161,6 @@ void SharedRuntime::generate_uncommon_trap_blob() {
InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
address start = __ pc();
if (UseRTMLocking) {
// Abort RTM transaction before possible nmethod deoptimization.
__ tabort_();
}
Register unroll_block_reg = R21_tmp1;
Register klass_index_reg = R22_tmp2;
Register unc_trap_reg = R23_tmp3;
@ -3323,13 +3311,6 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc;
}
if (UseRTMLocking) {
// Abort RTM transaction before calling runtime
// because critical section can be large and so
// will abort anyway. Also nmethod can be deoptimized.
__ tabort_();
}
bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
// Save registers, fpu state, and flags. Set R31 = return pc.

@ -45,14 +45,6 @@
#include <libperfstat.h>
#endif
#if defined(LINUX) && defined(VM_LITTLE_ENDIAN)
#include <sys/auxv.h>
#ifndef PPC_FEATURE2_HTM_NOSC
#define PPC_FEATURE2_HTM_NOSC (1 << 24)
#endif
#endif
bool VM_Version::_is_determine_features_test_running = false;
uint64_t VM_Version::_dscr_val = 0;
@ -182,7 +174,7 @@ void VM_Version::initialize() {
// Create and print feature-string.
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
jio_snprintf(buf, sizeof(buf),
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
(has_fsqrt() ? " fsqrt" : ""),
(has_isel() ? " isel" : ""),
(has_lxarxeh() ? " lxarxeh" : ""),
@ -199,7 +191,6 @@ void VM_Version::initialize() {
(has_ldbrx() ? " ldbrx" : ""),
(has_stdbrx() ? " stdbrx" : ""),
(has_vshasig() ? " sha" : ""),
(has_tm() ? " rtm" : ""),
(has_darn() ? " darn" : ""),
(has_brw() ? " brw" : "")
// Make sure number of %s matches num_features!
@ -376,49 +367,6 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
}
// Adjust RTM (Restricted Transactional Memory) flags.
if (UseRTMLocking) {
// If CPU or OS do not support RTM:
if (PowerArchitecturePPC64 < 8 || PowerArchitecturePPC64 > 9) {
vm_exit_during_initialization("RTM instructions are not available on this CPU.");
}
if (!has_tm()) {
vm_exit_during_initialization("RTM is not supported on this OS version.");
}
#if INCLUDE_RTM_OPT
if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
// RTM locking should be used only for applications with
// high lock contention. For now we do not use it by default.
vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
}
if (LockingMode != LM_LEGACY) {
warning("UseRTMLocking requires LockingMode = 1");
FLAG_SET_DEFAULT(UseRTMLocking, false);
}
#else
// Only C2 does RTM locking optimization.
vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
#endif
} else { // !UseRTMLocking
if (UseRTMForStackLocks) {
if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
}
FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
}
if (UseRTMDeopt) {
FLAG_SET_DEFAULT(UseRTMDeopt, false);
}
#ifdef COMPILER2
if (PrintPreciseRTMLockingStatistics) {
FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
}
#endif
}
// This machine allows unaligned memory accesses
if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
@ -601,7 +549,6 @@ void VM_Version::determine_features() {
a->ldbrx(R7, R3_ARG1, R4_ARG2); // code[14] -> ldbrx
a->stdbrx(R7, R3_ARG1, R4_ARG2); // code[15] -> stdbrx
a->vshasigmaw(VR0, VR1, 1, 0xF); // code[16] -> vshasig
// rtm is determined by OS
a->darn(R7); // code[17] -> darn
a->brw(R5, R6); // code[18] -> brw
a->blr();
@ -655,7 +602,6 @@ void VM_Version::determine_features() {
if (code[feature_cntr++]) features |= ldbrx_m;
if (code[feature_cntr++]) features |= stdbrx_m;
if (code[feature_cntr++]) features |= vshasig_m;
// feature rtm_m is determined by OS
if (code[feature_cntr++]) features |= darn_m;
if (code[feature_cntr++]) features |= brw_m;
@ -667,37 +613,6 @@ void VM_Version::determine_features() {
}
_features = features;
#ifdef AIX
// To enable it on AIX it's necessary POWER8 or above and at least AIX 7.2.
// Actually, this is supported since AIX 7.1.. Unfortunately, this first
// contained bugs, so that it can only be enabled after AIX 7.1.3.30.
// The Java property os.version, which is used in RTM tests to decide
// whether the feature is available, only knows major and minor versions.
// We don't want to change this property, as user code might depend on it.
// So the tests can not check on subversion 3.30, and we only enable RTM
// with AIX 7.2.
if (has_lqarx() && !has_brw()) { // POWER8 or POWER9
if (os::Aix::os_version() >= 0x07020000) { // At least AIX 7.2.
_features |= rtm_m;
}
}
#endif
#if defined(LINUX) && defined(VM_LITTLE_ENDIAN)
unsigned long auxv = getauxval(AT_HWCAP2);
if (auxv & PPC_FEATURE2_HTM_NOSC) {
if (auxv & PPC_FEATURE2_HAS_HTM) {
// TM on POWER8 and POWER9 in compat mode (VM) is supported by the JVM.
// TM on POWER9 DD2.1 NV (baremetal) is not supported by the JVM (TM on
// POWER9 DD2.1 NV has a few issues that need a couple of firmware
// and kernel workarounds, so there is a new mode only supported
// on non-virtualized P9 machines called HTM with no Suspend Mode).
// TM on POWER9 D2.2+ NV is not supported at all by Linux.
_features |= rtm_m;
}
}
#endif
}
// Power 8: Configure Data Stream Control Register.

@ -49,7 +49,6 @@ protected:
ldbrx,
stdbrx,
vshasig,
rtm,
darn,
brw,
num_features // last entry to count features
@ -73,7 +72,6 @@ protected:
ldbrx_m = (1 << ldbrx ),
stdbrx_m = (1 << stdbrx ),
vshasig_m = (1 << vshasig),
rtm_m = (1 << rtm ),
darn_m = (1 << darn ),
brw_m = (1 << brw ),
all_features_m = (unsigned long)-1
@ -116,7 +114,6 @@ public:
static bool has_ldbrx() { return (_features & ldbrx_m) != 0; }
static bool has_stdbrx() { return (_features & stdbrx_m) != 0; }
static bool has_vshasig() { return (_features & vshasig_m) != 0; }
static bool has_tm() { return (_features & rtm_m) != 0; }
static bool has_darn() { return (_features & darn_m) != 0; }
static bool has_brw() { return (_features & brw_m) != 0; }

@ -1928,7 +1928,7 @@ bool Arguments::check_vm_args_consistency() {
return false;
}
#endif
#if (defined(X86) || defined(PPC64)) && !defined(ZERO)
#if defined(X86) && !defined(ZERO)
if (LockingMode == LM_MONITOR && UseRTMForStackLocks) {
jio_fprintf(defaultStream::error_stream(),
"LockingMode == 0 (LM_MONITOR) and -XX:+UseRTMForStackLocks are mutually exclusive");

@ -55,13 +55,13 @@ compiler/rtm/locking/TestRTMAbortRatio.java 8183263 generic-x64,generic-i586
compiler/rtm/locking/TestRTMAbortThreshold.java 8183263 generic-x64,generic-i586
compiler/rtm/locking/TestRTMAfterNonRTMDeopt.java 8183263 generic-x64,generic-i586
compiler/rtm/locking/TestRTMDeoptOnHighAbortRatio.java 8183263 generic-x64,generic-i586
compiler/rtm/locking/TestRTMDeoptOnLowAbortRatio.java 8183263,8307907 generic-x64,generic-i586,aix-ppc64
compiler/rtm/locking/TestRTMDeoptOnLowAbortRatio.java 8183263 generic-x64,generic-i586
compiler/rtm/locking/TestRTMLockingCalculationDelay.java 8183263 generic-x64,generic-i586
compiler/rtm/locking/TestRTMLockingThreshold.java 8183263,8307907 generic-x64,generic-i586,aix-ppc64
compiler/rtm/locking/TestRTMLockingThreshold.java 8183263 generic-x64,generic-i586
compiler/rtm/locking/TestRTMSpinLoopCount.java 8183263 generic-x64,generic-i586
compiler/rtm/locking/TestUseRTMDeopt.java 8183263 generic-x64,generic-i586
compiler/rtm/locking/TestUseRTMXendForLockBusy.java 8183263,8307907 generic-x64,generic-i586,aix-ppc64
compiler/rtm/print/TestPrintPreciseRTMLockingStatistics.java 8183263,8307907 generic-x64,generic-i586,aix-ppc64
compiler/rtm/locking/TestUseRTMXendForLockBusy.java 8183263 generic-x64,generic-i586
compiler/rtm/print/TestPrintPreciseRTMLockingStatistics.java 8183263 generic-x64,generic-i586
compiler/c2/Test8004741.java 8235801 generic-all

@ -36,8 +36,6 @@ public abstract class RTMGenericCommandLineOptionTest {
protected static final String RTM_INSTR_ERROR
= "RTM instructions are not available on this CPU";
protected static final String RTM_OS_ERROR
= "RTM is not supported on this OS version";
protected static final String RTM_UNSUPPORTED_VM_ERROR
= "RTM locking optimization is not supported in this VM";
protected static final String RTM_FOR_STACK_LOCKS_WARNING
@ -84,7 +82,7 @@ public abstract class RTMGenericCommandLineOptionTest {
}
public void runTestCases() throws Throwable {
if (Platform.isX86() || Platform.isX64() || Platform.isPPC()) {
if (Platform.isX86() || Platform.isX64()) {
if (Platform.isServer()) {
runX86SupportedVMTestCases();
} else {

@ -49,37 +49,17 @@ public class TestUseRTMLockingOptionOnUnsupportedCPU {
"UseRTMLocking");
String errorMessage = RTMGenericCommandLineOptionTest.RTM_INSTR_ERROR;
if (Platform.isX86() || Platform.isX64() || Platform.isPPC()) {
if (Platform.isX86() || Platform.isX64()) {
String shouldFailMessage = "JVM startup should fail with option " +
"-XX:+UseRTMLocking on unsupported CPU";
try {
// verify that we get an error when use +UseRTMLocking
// on unsupported CPU
CommandLineOptionTest.verifySameJVMStartup(
new String[] { errorMessage },
new String[] { unrecognizedOption }, shouldFailMessage,
shouldFailMessage + ". Error message should be shown.",
ExitCode.FAIL, "-XX:+UseRTMLocking");
} catch (Throwable e) {
// verify that we get an error when use +UseRTMLocking
// on unsupported OS. It might be the case that although CPU
// supports RTM the OS version does not support RTM
if (Platform.isPPC()) {
String errorMessage2 = RTMGenericCommandLineOptionTest.RTM_OS_ERROR;
String shouldFailMessage2 = "JVM startup should fail with option " +
"-XX:+UseRTMLocking on unsupported CPU or " +
"OS version";
CommandLineOptionTest.verifySameJVMStartup(
new String[] { errorMessage2 },
new String[] { unrecognizedOption}, shouldFailMessage2,
shouldFailMessage2 + ". Error message should be shown.",
ExitCode.FAIL, "-XX:+UseRTMLocking");
} else {
throw e; // checking unsupported OS error is not necessary
}
}
// verify that we get an error when use +UseRTMLocking
// on unsupported CPU
CommandLineOptionTest.verifySameJVMStartup(
new String[] { errorMessage },
new String[] { unrecognizedOption }, shouldFailMessage,
shouldFailMessage + ". Error message should be shown.",
ExitCode.FAIL, "-XX:+UseRTMLocking");
String shouldPassMessage = "JVM startup should pass with option "
+ "-XX:-UseRTMLocking even on unsupported CPU";

@ -63,11 +63,7 @@ public class TestRTMSpinLoopCount {
protected void runTestCases() throws Throwable {
if (Platform.isPPC()) {
SPIN_LOOP_COUNTS = new int[] { 0, 10, 100, 1_000, 10_000 };
} else {
SPIN_LOOP_COUNTS = new int[] { 0, 100, 1_000, 10_000, 100_000 };
}
SPIN_LOOP_COUNTS = new int[] { 0, 100, 1_000, 10_000, 100_000 };
long[] aborts = new long[TestRTMSpinLoopCount.SPIN_LOOP_COUNTS.length];