8210381: Obsolete EmitSync
Reviewed-by: kvn, dcubed, mdoerr, mbaesken, shade
This commit is contained in:
parent
84cf73f2a5
commit
0f68e5221f
src/hotspot
cpu
aarch64
ppc
s390
sparc
x86
share/runtime
@ -3378,26 +3378,18 @@ encode %{
|
||||
// Load markOop from object into displaced_header.
|
||||
__ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
|
||||
|
||||
// Always do locking in runtime.
|
||||
if (EmitSync & 0x01) {
|
||||
__ cmp(oop, zr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (UseBiasedLocking && !UseOptoBiasInlining) {
|
||||
__ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
|
||||
}
|
||||
|
||||
// Handle existing monitor
|
||||
if ((EmitSync & 0x02) == 0) {
|
||||
// we can use AArch64's bit test and branch here but
|
||||
// markoopDesc does not define a bit index just the bit value
|
||||
// so assert in case the bit pos changes
|
||||
# define __monitor_value_log2 1
|
||||
assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
|
||||
__ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
|
||||
# undef __monitor_value_log2
|
||||
}
|
||||
// we can use AArch64's bit test and branch here but
|
||||
// markoopDesc does not define a bit index just the bit value
|
||||
// so assert in case the bit pos changes
|
||||
# define __monitor_value_log2 1
|
||||
assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
|
||||
__ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
|
||||
# undef __monitor_value_log2
|
||||
|
||||
// Set displaced_header to be (markOop of object | UNLOCK_VALUE).
|
||||
__ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
|
||||
@ -3455,63 +3447,62 @@ encode %{
|
||||
__ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
|
||||
|
||||
// Handle existing monitor.
|
||||
if ((EmitSync & 0x02) == 0) {
|
||||
__ b(cont);
|
||||
__ b(cont);
|
||||
|
||||
__ bind(object_has_monitor);
|
||||
// The object's monitor m is unlocked iff m->owner == NULL,
|
||||
// otherwise m->owner may contain a thread or a stack address.
|
||||
//
|
||||
// Try to CAS m->owner from NULL to current thread.
|
||||
__ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
|
||||
__ mov(disp_hdr, zr);
|
||||
__ bind(object_has_monitor);
|
||||
// The object's monitor m is unlocked iff m->owner == NULL,
|
||||
// otherwise m->owner may contain a thread or a stack address.
|
||||
//
|
||||
// Try to CAS m->owner from NULL to current thread.
|
||||
__ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
|
||||
__ mov(disp_hdr, zr);
|
||||
|
||||
if (UseLSE) {
|
||||
__ mov(rscratch1, disp_hdr);
|
||||
__ casal(Assembler::xword, rscratch1, rthread, tmp);
|
||||
__ cmp(rscratch1, disp_hdr);
|
||||
} else {
|
||||
Label retry_load, fail;
|
||||
if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
|
||||
__ prfm(Address(tmp), PSTL1STRM);
|
||||
__ bind(retry_load);
|
||||
__ ldaxr(rscratch1, tmp);
|
||||
__ cmp(disp_hdr, rscratch1);
|
||||
__ br(Assembler::NE, fail);
|
||||
// use stlxr to ensure update is immediately visible
|
||||
__ stlxr(rscratch1, rthread, tmp);
|
||||
__ cbnzw(rscratch1, retry_load);
|
||||
__ bind(fail);
|
||||
if (UseLSE) {
|
||||
__ mov(rscratch1, disp_hdr);
|
||||
__ casal(Assembler::xword, rscratch1, rthread, tmp);
|
||||
__ cmp(rscratch1, disp_hdr);
|
||||
} else {
|
||||
Label retry_load, fail;
|
||||
if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) {
|
||||
__ prfm(Address(tmp), PSTL1STRM);
|
||||
}
|
||||
|
||||
// Label next;
|
||||
// __ cmpxchgptr(/*oldv=*/disp_hdr,
|
||||
// /*newv=*/rthread,
|
||||
// /*addr=*/tmp,
|
||||
// /*tmp=*/rscratch1,
|
||||
// /*succeed*/next,
|
||||
// /*fail*/NULL);
|
||||
// __ bind(next);
|
||||
|
||||
// store a non-null value into the box.
|
||||
__ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
|
||||
|
||||
// PPC port checks the following invariants
|
||||
// #ifdef ASSERT
|
||||
// bne(flag, cont);
|
||||
// We have acquired the monitor, check some invariants.
|
||||
// addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
|
||||
// Invariant 1: _recursions should be 0.
|
||||
// assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
|
||||
// assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
|
||||
// "monitor->_recursions should be 0", -1);
|
||||
// Invariant 2: OwnerIsThread shouldn't be 0.
|
||||
// assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
|
||||
//assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
|
||||
// "monitor->OwnerIsThread shouldn't be 0", -1);
|
||||
// #endif
|
||||
__ bind(retry_load);
|
||||
__ ldaxr(rscratch1, tmp);
|
||||
__ cmp(disp_hdr, rscratch1);
|
||||
__ br(Assembler::NE, fail);
|
||||
// use stlxr to ensure update is immediately visible
|
||||
__ stlxr(rscratch1, rthread, tmp);
|
||||
__ cbnzw(rscratch1, retry_load);
|
||||
__ bind(fail);
|
||||
}
|
||||
|
||||
// Label next;
|
||||
// __ cmpxchgptr(/*oldv=*/disp_hdr,
|
||||
// /*newv=*/rthread,
|
||||
// /*addr=*/tmp,
|
||||
// /*tmp=*/rscratch1,
|
||||
// /*succeed*/next,
|
||||
// /*fail*/NULL);
|
||||
// __ bind(next);
|
||||
|
||||
// store a non-null value into the box.
|
||||
__ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
|
||||
|
||||
// PPC port checks the following invariants
|
||||
// #ifdef ASSERT
|
||||
// bne(flag, cont);
|
||||
// We have acquired the monitor, check some invariants.
|
||||
// addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
|
||||
// Invariant 1: _recursions should be 0.
|
||||
// assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
|
||||
// assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
|
||||
// "monitor->_recursions should be 0", -1);
|
||||
// Invariant 2: OwnerIsThread shouldn't be 0.
|
||||
// assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
|
||||
//assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
|
||||
// "monitor->OwnerIsThread shouldn't be 0", -1);
|
||||
// #endif
|
||||
|
||||
__ bind(cont);
|
||||
// flag == EQ indicates success
|
||||
// flag == NE indicates failure
|
||||
@ -3533,12 +3524,6 @@ encode %{
|
||||
|
||||
assert_different_registers(oop, box, tmp, disp_hdr);
|
||||
|
||||
// Always do locking in runtime.
|
||||
if (EmitSync & 0x01) {
|
||||
__ cmp(oop, zr); // Oop can't be 0 here => always false.
|
||||
return;
|
||||
}
|
||||
|
||||
if (UseBiasedLocking && !UseOptoBiasInlining) {
|
||||
__ biased_locking_exit(oop, tmp, cont);
|
||||
}
|
||||
@ -3552,10 +3537,8 @@ encode %{
|
||||
|
||||
|
||||
// Handle existing monitor.
|
||||
if ((EmitSync & 0x02) == 0) {
|
||||
__ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
|
||||
__ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
|
||||
}
|
||||
__ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
|
||||
__ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
|
||||
|
||||
// Check if it is still a light weight lock, this is is true if we
|
||||
// see the stack address of the basicLock in the markOop of the
|
||||
@ -3590,27 +3573,25 @@ encode %{
|
||||
__ bind(cas_failed);
|
||||
|
||||
// Handle existing monitor.
|
||||
if ((EmitSync & 0x02) == 0) {
|
||||
__ b(cont);
|
||||
__ b(cont);
|
||||
|
||||
__ bind(object_has_monitor);
|
||||
__ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
|
||||
__ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
|
||||
__ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
|
||||
__ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
|
||||
__ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
|
||||
__ cmp(rscratch1, zr);
|
||||
__ br(Assembler::NE, cont);
|
||||
__ bind(object_has_monitor);
|
||||
__ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
|
||||
__ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
|
||||
__ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
|
||||
__ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
|
||||
__ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
|
||||
__ cmp(rscratch1, zr);
|
||||
__ br(Assembler::NE, cont);
|
||||
|
||||
__ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
|
||||
__ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
|
||||
__ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
|
||||
__ cmp(rscratch1, zr);
|
||||
__ cbnz(rscratch1, cont);
|
||||
// need a release store here
|
||||
__ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
|
||||
__ stlr(rscratch1, tmp); // rscratch1 is zero
|
||||
}
|
||||
__ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
|
||||
__ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
|
||||
__ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
|
||||
__ cmp(rscratch1, zr);
|
||||
__ cbnz(rscratch1, cont);
|
||||
// need a release store here
|
||||
__ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
|
||||
__ stlr(rscratch1, tmp); // rscratch1 is zero
|
||||
|
||||
__ bind(cont);
|
||||
// flag == EQ indicates success
|
||||
|
@ -2848,12 +2848,6 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
|
||||
ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
|
||||
|
||||
|
||||
// Always do locking in runtime.
|
||||
if (EmitSync & 0x01) {
|
||||
cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
|
||||
return;
|
||||
}
|
||||
|
||||
if (try_bias) {
|
||||
biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
|
||||
}
|
||||
@ -2867,11 +2861,9 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
|
||||
#endif // INCLUDE_RTM_OPT
|
||||
|
||||
// Handle existing monitor.
|
||||
if ((EmitSync & 0x02) == 0) {
|
||||
// The object has an existing monitor iff (mark & monitor_value) != 0.
|
||||
andi_(temp, displaced_header, markOopDesc::monitor_value);
|
||||
bne(CCR0, object_has_monitor);
|
||||
}
|
||||
// The object has an existing monitor iff (mark & monitor_value) != 0.
|
||||
andi_(temp, displaced_header, markOopDesc::monitor_value);
|
||||
bne(CCR0, object_has_monitor);
|
||||
|
||||
// Set displaced_header to be (markOop of object | UNLOCK_VALUE).
|
||||
ori(displaced_header, displaced_header, markOopDesc::unlocked_value);
|
||||
@ -2914,48 +2906,46 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
|
||||
std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
|
||||
|
||||
// Handle existing monitor.
|
||||
if ((EmitSync & 0x02) == 0) {
|
||||
b(cont);
|
||||
b(cont);
|
||||
|
||||
bind(object_has_monitor);
|
||||
// The object's monitor m is unlocked iff m->owner == NULL,
|
||||
// otherwise m->owner may contain a thread or a stack address.
|
||||
bind(object_has_monitor);
|
||||
// The object's monitor m is unlocked iff m->owner == NULL,
|
||||
// otherwise m->owner may contain a thread or a stack address.
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
// Use the same RTM locking code in 32- and 64-bit VM.
|
||||
if (use_rtm) {
|
||||
rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
|
||||
rtm_counters, method_data, profile_rtm, cont);
|
||||
} else {
|
||||
// Use the same RTM locking code in 32- and 64-bit VM.
|
||||
if (use_rtm) {
|
||||
rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
|
||||
rtm_counters, method_data, profile_rtm, cont);
|
||||
} else {
|
||||
#endif // INCLUDE_RTM_OPT
|
||||
|
||||
// Try to CAS m->owner from NULL to current thread.
|
||||
addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
|
||||
cmpxchgd(/*flag=*/flag,
|
||||
/*current_value=*/current_header,
|
||||
/*compare_value=*/(intptr_t)0,
|
||||
/*exchange_value=*/R16_thread,
|
||||
/*where=*/temp,
|
||||
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
|
||||
MacroAssembler::cmpxchgx_hint_acquire_lock());
|
||||
// Try to CAS m->owner from NULL to current thread.
|
||||
addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
|
||||
cmpxchgd(/*flag=*/flag,
|
||||
/*current_value=*/current_header,
|
||||
/*compare_value=*/(intptr_t)0,
|
||||
/*exchange_value=*/R16_thread,
|
||||
/*where=*/temp,
|
||||
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
|
||||
MacroAssembler::cmpxchgx_hint_acquire_lock());
|
||||
|
||||
// Store a non-null value into the box.
|
||||
std(box, BasicLock::displaced_header_offset_in_bytes(), box);
|
||||
// Store a non-null value into the box.
|
||||
std(box, BasicLock::displaced_header_offset_in_bytes(), box);
|
||||
|
||||
# ifdef ASSERT
|
||||
bne(flag, cont);
|
||||
// We have acquired the monitor, check some invariants.
|
||||
addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
|
||||
// Invariant 1: _recursions should be 0.
|
||||
//assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
|
||||
asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
|
||||
# ifdef ASSERT
|
||||
bne(flag, cont);
|
||||
// We have acquired the monitor, check some invariants.
|
||||
addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
|
||||
// Invariant 1: _recursions should be 0.
|
||||
//assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
|
||||
asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
|
||||
"monitor->_recursions should be 0", -1);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
} // use_rtm()
|
||||
} // use_rtm()
|
||||
#endif
|
||||
}
|
||||
|
||||
bind(cont);
|
||||
// flag == EQ indicates success
|
||||
@ -2970,12 +2960,6 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
|
||||
Label cont;
|
||||
Label object_has_monitor;
|
||||
|
||||
// Always do locking in runtime.
|
||||
if (EmitSync & 0x01) {
|
||||
cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
|
||||
return;
|
||||
}
|
||||
|
||||
if (try_bias) {
|
||||
biased_locking_exit(flag, oop, current_header, cont);
|
||||
}
|
||||
@ -3002,13 +2986,11 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
|
||||
beq(flag, cont);
|
||||
|
||||
// Handle existing monitor.
|
||||
if ((EmitSync & 0x02) == 0) {
|
||||
// The object has an existing monitor iff (mark & monitor_value) != 0.
|
||||
RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
|
||||
ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
|
||||
andi_(R0, current_header, markOopDesc::monitor_value);
|
||||
bne(CCR0, object_has_monitor);
|
||||
}
|
||||
// The object has an existing monitor iff (mark & monitor_value) != 0.
|
||||
RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
|
||||
ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
|
||||
andi_(R0, current_header, markOopDesc::monitor_value);
|
||||
bne(CCR0, object_has_monitor);
|
||||
|
||||
// Check if it is still a light weight lock, this is is true if we see
|
||||
// the stack address of the basicLock in the markOop of the object.
|
||||
@ -3026,40 +3008,38 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
|
||||
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
|
||||
|
||||
// Handle existing monitor.
|
||||
if ((EmitSync & 0x02) == 0) {
|
||||
b(cont);
|
||||
b(cont);
|
||||
|
||||
bind(object_has_monitor);
|
||||
addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
|
||||
ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
|
||||
bind(object_has_monitor);
|
||||
addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
|
||||
ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
|
||||
|
||||
// It's inflated.
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (use_rtm) {
|
||||
Label L_regular_inflated_unlock;
|
||||
// Clean monitor_value bit to get valid pointer
|
||||
cmpdi(flag, temp, 0);
|
||||
bne(flag, L_regular_inflated_unlock);
|
||||
tend_();
|
||||
b(cont);
|
||||
bind(L_regular_inflated_unlock);
|
||||
}
|
||||
if (use_rtm) {
|
||||
Label L_regular_inflated_unlock;
|
||||
// Clean monitor_value bit to get valid pointer
|
||||
cmpdi(flag, temp, 0);
|
||||
bne(flag, L_regular_inflated_unlock);
|
||||
tend_();
|
||||
b(cont);
|
||||
bind(L_regular_inflated_unlock);
|
||||
}
|
||||
#endif
|
||||
|
||||
ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
|
||||
xorr(temp, R16_thread, temp); // Will be 0 if we are the owner.
|
||||
orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
|
||||
cmpdi(flag, temp, 0);
|
||||
bne(flag, cont);
|
||||
ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
|
||||
xorr(temp, R16_thread, temp); // Will be 0 if we are the owner.
|
||||
orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
|
||||
cmpdi(flag, temp, 0);
|
||||
bne(flag, cont);
|
||||
|
||||
ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header);
|
||||
ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
|
||||
orr(temp, temp, displaced_header); // Will be 0 if both are 0.
|
||||
cmpdi(flag, temp, 0);
|
||||
bne(flag, cont);
|
||||
release();
|
||||
std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
|
||||
}
|
||||
ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header);
|
||||
ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
|
||||
orr(temp, temp, displaced_header); // Will be 0 if both are 0.
|
||||
cmpdi(flag, temp, 0);
|
||||
bne(flag, cont);
|
||||
release();
|
||||
std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
|
||||
|
||||
bind(cont);
|
||||
// flag == EQ indicates success
|
||||
|
@ -3374,13 +3374,11 @@ void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Regis
|
||||
}
|
||||
|
||||
// Handle existing monitor.
|
||||
if ((EmitSync & 0x01) == 0) {
|
||||
// The object has an existing monitor iff (mark & monitor_value) != 0.
|
||||
guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
|
||||
z_lr(temp, displacedHeader);
|
||||
z_nill(temp, markOopDesc::monitor_value);
|
||||
z_brne(object_has_monitor);
|
||||
}
|
||||
// The object has an existing monitor iff (mark & monitor_value) != 0.
|
||||
guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
|
||||
z_lr(temp, displacedHeader);
|
||||
z_nill(temp, markOopDesc::monitor_value);
|
||||
z_brne(object_has_monitor);
|
||||
|
||||
// Set mark to markOop | markOopDesc::unlocked_value.
|
||||
z_oill(displacedHeader, markOopDesc::unlocked_value);
|
||||
@ -3411,28 +3409,26 @@ void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Regis
|
||||
|
||||
z_bru(done);
|
||||
|
||||
if ((EmitSync & 0x01) == 0) {
|
||||
Register zero = temp;
|
||||
Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value.
|
||||
bind(object_has_monitor);
|
||||
// The object's monitor m is unlocked iff m->owner == NULL,
|
||||
// otherwise m->owner may contain a thread or a stack address.
|
||||
//
|
||||
// Try to CAS m->owner from NULL to current thread.
|
||||
z_lghi(zero, 0);
|
||||
// If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
|
||||
z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
|
||||
// Store a non-null value into the box.
|
||||
z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
|
||||
Register zero = temp;
|
||||
Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value.
|
||||
bind(object_has_monitor);
|
||||
// The object's monitor m is unlocked iff m->owner == NULL,
|
||||
// otherwise m->owner may contain a thread or a stack address.
|
||||
//
|
||||
// Try to CAS m->owner from NULL to current thread.
|
||||
z_lghi(zero, 0);
|
||||
// If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
|
||||
z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
|
||||
// Store a non-null value into the box.
|
||||
z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
|
||||
#ifdef ASSERT
|
||||
z_brne(done);
|
||||
// We've acquired the monitor, check some invariants.
|
||||
// Invariant 1: _recursions should be 0.
|
||||
asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
|
||||
"monitor->_recursions should be 0", -1);
|
||||
z_ltgr(zero, zero); // Set CR=EQ.
|
||||
z_brne(done);
|
||||
// We've acquired the monitor, check some invariants.
|
||||
// Invariant 1: _recursions should be 0.
|
||||
asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
|
||||
"monitor->_recursions should be 0", -1);
|
||||
z_ltgr(zero, zero); // Set CR=EQ.
|
||||
#endif
|
||||
}
|
||||
bind(done);
|
||||
|
||||
BLOCK_COMMENT("} compiler_fast_lock_object");
|
||||
@ -3461,13 +3457,11 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg
|
||||
z_bre(done);
|
||||
|
||||
// Handle existing monitor.
|
||||
if ((EmitSync & 0x02) == 0) {
|
||||
// The object has an existing monitor iff (mark & monitor_value) != 0.
|
||||
z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
|
||||
guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
|
||||
z_nill(currentHeader, markOopDesc::monitor_value);
|
||||
z_brne(object_has_monitor);
|
||||
}
|
||||
// The object has an existing monitor iff (mark & monitor_value) != 0.
|
||||
z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
|
||||
guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
|
||||
z_nill(currentHeader, markOopDesc::monitor_value);
|
||||
z_brne(object_has_monitor);
|
||||
|
||||
// Check if it is still a light weight lock, this is true if we see
|
||||
// the stack address of the basicLock in the markOop of the object
|
||||
@ -3477,20 +3471,18 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg
|
||||
z_bru(done); // Csg sets CR as desired.
|
||||
|
||||
// Handle existing monitor.
|
||||
if ((EmitSync & 0x02) == 0) {
|
||||
bind(object_has_monitor);
|
||||
z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set.
|
||||
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
|
||||
z_brne(done);
|
||||
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
z_brne(done);
|
||||
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
|
||||
z_brne(done);
|
||||
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
|
||||
z_brne(done);
|
||||
z_release();
|
||||
z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
|
||||
}
|
||||
bind(object_has_monitor);
|
||||
z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set.
|
||||
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
|
||||
z_brne(done);
|
||||
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
z_brne(done);
|
||||
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
|
||||
z_brne(done);
|
||||
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
|
||||
z_brne(done);
|
||||
z_release();
|
||||
z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
|
||||
|
||||
bind(done);
|
||||
|
||||
|
@ -2648,195 +2648,92 @@ void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
|
||||
inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
|
||||
}
|
||||
|
||||
if (EmitSync & 1) {
|
||||
mov(3, Rscratch);
|
||||
st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
cmp(SP, G0);
|
||||
return ;
|
||||
}
|
||||
|
||||
if (EmitSync & 2) {
|
||||
|
||||
// Fetch object's markword
|
||||
ld_ptr(mark_addr, Rmark);
|
||||
|
||||
if (try_bias) {
|
||||
biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
|
||||
}
|
||||
|
||||
// Save Rbox in Rscratch to be used for the cas operation
|
||||
mov(Rbox, Rscratch);
|
||||
|
||||
// set Rmark to markOop | markOopDesc::unlocked_value
|
||||
or3(Rmark, markOopDesc::unlocked_value, Rmark);
|
||||
|
||||
// Initialize the box. (Must happen before we update the object mark!)
|
||||
st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
|
||||
// compare object markOop with Rmark and if equal exchange Rscratch with object markOop
|
||||
assert(mark_addr.disp() == 0, "cas must take a zero displacement");
|
||||
cas_ptr(mark_addr.base(), Rmark, Rscratch);
|
||||
|
||||
// if compare/exchange succeeded we found an unlocked object and we now have locked it
|
||||
// hence we are done
|
||||
cmp(Rmark, Rscratch);
|
||||
sub(Rscratch, STACK_BIAS, Rscratch);
|
||||
brx(Assembler::equal, false, Assembler::pt, done);
|
||||
delayed()->sub(Rscratch, SP, Rscratch); //pull next instruction into delay slot
|
||||
|
||||
// we did not find an unlocked object so see if this is a recursive case
|
||||
// sub(Rscratch, SP, Rscratch);
|
||||
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
|
||||
andcc(Rscratch, 0xfffff003, Rscratch);
|
||||
st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
bind (done);
|
||||
return ;
|
||||
}
|
||||
|
||||
Label Egress ;
|
||||
|
||||
if (EmitSync & 256) {
|
||||
Label IsInflated ;
|
||||
|
||||
ld_ptr(mark_addr, Rmark); // fetch obj->mark
|
||||
// Triage: biased, stack-locked, neutral, inflated
|
||||
if (try_bias) {
|
||||
biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
|
||||
// Invariant: if control reaches this point in the emitted stream
|
||||
// then Rmark has not been modified.
|
||||
}
|
||||
|
||||
// Store mark into displaced mark field in the on-stack basic-lock "box"
|
||||
// Critically, this must happen before the CAS
|
||||
// Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
|
||||
st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
andcc(Rmark, 2, G0);
|
||||
brx(Assembler::notZero, false, Assembler::pn, IsInflated);
|
||||
delayed()->
|
||||
|
||||
// Try stack-lock acquisition.
|
||||
// Beware: the 1st instruction is in a delay slot
|
||||
mov(Rbox, Rscratch);
|
||||
or3(Rmark, markOopDesc::unlocked_value, Rmark);
|
||||
assert(mark_addr.disp() == 0, "cas must take a zero displacement");
|
||||
cas_ptr(mark_addr.base(), Rmark, Rscratch);
|
||||
cmp(Rmark, Rscratch);
|
||||
brx(Assembler::equal, false, Assembler::pt, done);
|
||||
delayed()->sub(Rscratch, SP, Rscratch);
|
||||
|
||||
// Stack-lock attempt failed - check for recursive stack-lock.
|
||||
// See the comments below about how we might remove this case.
|
||||
sub(Rscratch, STACK_BIAS, Rscratch);
|
||||
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
|
||||
andcc(Rscratch, 0xfffff003, Rscratch);
|
||||
br(Assembler::always, false, Assembler::pt, done);
|
||||
delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
|
||||
bind(IsInflated);
|
||||
if (EmitSync & 64) {
|
||||
// If m->owner != null goto IsLocked
|
||||
// Pessimistic form: Test-and-CAS vs CAS
|
||||
// The optimistic form avoids RTS->RTO cache line upgrades.
|
||||
ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
|
||||
andcc(Rscratch, Rscratch, G0);
|
||||
brx(Assembler::notZero, false, Assembler::pn, done);
|
||||
delayed()->nop();
|
||||
// m->owner == null : it's unlocked.
|
||||
}
|
||||
|
||||
// Try to CAS m->owner from null to Self
|
||||
// Invariant: if we acquire the lock then _recursions should be 0.
|
||||
add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
|
||||
mov(G2_thread, Rscratch);
|
||||
cas_ptr(Rmark, G0, Rscratch);
|
||||
cmp(Rscratch, G0);
|
||||
// Intentional fall-through into done
|
||||
} else {
|
||||
// Aggressively avoid the Store-before-CAS penalty
|
||||
// Defer the store into box->dhw until after the CAS
|
||||
Label IsInflated, Recursive ;
|
||||
// Aggressively avoid the Store-before-CAS penalty
|
||||
// Defer the store into box->dhw until after the CAS
|
||||
Label IsInflated, Recursive ;
|
||||
|
||||
// Anticipate CAS -- Avoid RTS->RTO upgrade
|
||||
// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
|
||||
|
||||
ld_ptr(mark_addr, Rmark); // fetch obj->mark
|
||||
// Triage: biased, stack-locked, neutral, inflated
|
||||
ld_ptr(mark_addr, Rmark); // fetch obj->mark
|
||||
// Triage: biased, stack-locked, neutral, inflated
|
||||
|
||||
if (try_bias) {
|
||||
biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
|
||||
// Invariant: if control reaches this point in the emitted stream
|
||||
// then Rmark has not been modified.
|
||||
}
|
||||
andcc(Rmark, 2, G0);
|
||||
brx(Assembler::notZero, false, Assembler::pn, IsInflated);
|
||||
delayed()-> // Beware - dangling delay-slot
|
||||
|
||||
// Try stack-lock acquisition.
|
||||
// Transiently install BUSY (0) encoding in the mark word.
|
||||
// if the CAS of 0 into the mark was successful then we execute:
|
||||
// ST box->dhw = mark -- save fetched mark in on-stack basiclock box
|
||||
// ST obj->mark = box -- overwrite transient 0 value
|
||||
// This presumes TSO, of course.
|
||||
|
||||
mov(0, Rscratch);
|
||||
or3(Rmark, markOopDesc::unlocked_value, Rmark);
|
||||
assert(mark_addr.disp() == 0, "cas must take a zero displacement");
|
||||
cas_ptr(mark_addr.base(), Rmark, Rscratch);
|
||||
// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
|
||||
cmp(Rscratch, Rmark);
|
||||
brx(Assembler::notZero, false, Assembler::pn, Recursive);
|
||||
delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
if (counters != NULL) {
|
||||
cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
|
||||
}
|
||||
ba(done);
|
||||
delayed()->st_ptr(Rbox, mark_addr);
|
||||
|
||||
bind(Recursive);
|
||||
// Stack-lock attempt failed - check for recursive stack-lock.
|
||||
// Tests show that we can remove the recursive case with no impact
|
||||
// on refworkload 0.83. If we need to reduce the size of the code
|
||||
// emitted by compiler_lock_object() the recursive case is perfect
|
||||
// candidate.
|
||||
//
|
||||
// A more extreme idea is to always inflate on stack-lock recursion.
|
||||
// This lets us eliminate the recursive checks in compiler_lock_object
|
||||
// and compiler_unlock_object and the (box->dhw == 0) encoding.
|
||||
// A brief experiment - requiring changes to synchronizer.cpp, interpreter,
|
||||
// and showed a performance *increase*. In the same experiment I eliminated
|
||||
// the fast-path stack-lock code from the interpreter and always passed
|
||||
// control to the "slow" operators in synchronizer.cpp.
|
||||
|
||||
// RScratch contains the fetched obj->mark value from the failed CAS.
|
||||
sub(Rscratch, STACK_BIAS, Rscratch);
|
||||
sub(Rscratch, SP, Rscratch);
|
||||
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
|
||||
andcc(Rscratch, 0xfffff003, Rscratch);
|
||||
if (counters != NULL) {
|
||||
// Accounting needs the Rscratch register
|
||||
st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
|
||||
ba_short(done);
|
||||
} else {
|
||||
ba(done);
|
||||
delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
}
|
||||
|
||||
bind (IsInflated);
|
||||
|
||||
// Try to CAS m->owner from null to Self
|
||||
// Invariant: if we acquire the lock then _recursions should be 0.
|
||||
add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
|
||||
mov(G2_thread, Rscratch);
|
||||
cas_ptr(Rmark, G0, Rscratch);
|
||||
andcc(Rscratch, Rscratch, G0); // set ICCs for done: icc.zf iff success
|
||||
// set icc.zf : 1=success 0=failure
|
||||
// ST box->displaced_header = NonZero.
|
||||
// Any non-zero value suffices:
|
||||
// markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
|
||||
st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
// Intentional fall-through into done
|
||||
if (try_bias) {
|
||||
biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
|
||||
// Invariant: if control reaches this point in the emitted stream
|
||||
// then Rmark has not been modified.
|
||||
}
|
||||
andcc(Rmark, 2, G0);
|
||||
brx(Assembler::notZero, false, Assembler::pn, IsInflated);
|
||||
delayed()-> // Beware - dangling delay-slot
|
||||
|
||||
// Try stack-lock acquisition.
|
||||
// Transiently install BUSY (0) encoding in the mark word.
|
||||
// if the CAS of 0 into the mark was successful then we execute:
|
||||
// ST box->dhw = mark -- save fetched mark in on-stack basiclock box
|
||||
// ST obj->mark = box -- overwrite transient 0 value
|
||||
// This presumes TSO, of course.
|
||||
|
||||
mov(0, Rscratch);
|
||||
or3(Rmark, markOopDesc::unlocked_value, Rmark);
|
||||
assert(mark_addr.disp() == 0, "cas must take a zero displacement");
|
||||
cas_ptr(mark_addr.base(), Rmark, Rscratch);
|
||||
// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
|
||||
cmp(Rscratch, Rmark);
|
||||
brx(Assembler::notZero, false, Assembler::pn, Recursive);
|
||||
delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
if (counters != NULL) {
|
||||
cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
|
||||
}
|
||||
ba(done);
|
||||
delayed()->st_ptr(Rbox, mark_addr);
|
||||
|
||||
bind(Recursive);
|
||||
// Stack-lock attempt failed - check for recursive stack-lock.
|
||||
// Tests show that we can remove the recursive case with no impact
|
||||
// on refworkload 0.83. If we need to reduce the size of the code
|
||||
// emitted by compiler_lock_object() the recursive case is perfect
|
||||
// candidate.
|
||||
//
|
||||
// A more extreme idea is to always inflate on stack-lock recursion.
|
||||
// This lets us eliminate the recursive checks in compiler_lock_object
|
||||
// and compiler_unlock_object and the (box->dhw == 0) encoding.
|
||||
// A brief experiment - requiring changes to synchronizer.cpp, interpreter,
|
||||
// and showed a performance *increase*. In the same experiment I eliminated
|
||||
// the fast-path stack-lock code from the interpreter and always passed
|
||||
// control to the "slow" operators in synchronizer.cpp.
|
||||
|
||||
// RScratch contains the fetched obj->mark value from the failed CAS.
|
||||
sub(Rscratch, STACK_BIAS, Rscratch);
|
||||
sub(Rscratch, SP, Rscratch);
|
||||
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
|
||||
andcc(Rscratch, 0xfffff003, Rscratch);
|
||||
if (counters != NULL) {
|
||||
// Accounting needs the Rscratch register
|
||||
st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
|
||||
ba_short(done);
|
||||
} else {
|
||||
ba(done);
|
||||
delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
}
|
||||
|
||||
bind (IsInflated);
|
||||
|
||||
// Try to CAS m->owner from null to Self
|
||||
// Invariant: if we acquire the lock then _recursions should be 0.
|
||||
add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
|
||||
mov(G2_thread, Rscratch);
|
||||
cas_ptr(Rmark, G0, Rscratch);
|
||||
andcc(Rscratch, Rscratch, G0); // set ICCs for done: icc.zf iff success
|
||||
// set icc.zf : 1=success 0=failure
|
||||
// ST box->displaced_header = NonZero.
|
||||
// Any non-zero value suffices:
|
||||
// markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
|
||||
st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
|
||||
// Intentional fall-through into done
|
||||
|
||||
bind (done);
|
||||
}
|
||||
@ -2848,30 +2745,6 @@ void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
|
||||
|
||||
Label done ;
|
||||
|
||||
if (EmitSync & 4) {
|
||||
cmp(SP, G0);
|
||||
return ;
|
||||
}
|
||||
|
||||
if (EmitSync & 8) {
|
||||
if (try_bias) {
|
||||
biased_locking_exit(mark_addr, Rscratch, done);
|
||||
}
|
||||
|
||||
// Test first if it is a fast recursive unlock
|
||||
ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
|
||||
br_null_short(Rmark, Assembler::pt, done);
|
||||
|
||||
// Check if it is still a light weight lock, this is is true if we see
|
||||
// the stack address of the basicLock in the markOop of the object
|
||||
assert(mark_addr.disp() == 0, "cas must take a zero displacement");
|
||||
cas_ptr(mark_addr.base(), Rbox, Rmark);
|
||||
ba(done);
|
||||
delayed()->cmp(Rbox, Rmark);
|
||||
bind(done);
|
||||
return ;
|
||||
}
|
||||
|
||||
// Beware ... If the aggregate size of the code emitted by CLO and CUO is
|
||||
// is too large performance rolls abruptly off a cliff.
|
||||
// This could be related to inlining policies, code cache management, or
|
||||
@ -2902,105 +2775,39 @@ void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
|
||||
// close the resultant (and rare) race by having contended threads in
|
||||
// monitorenter periodically poll _owner.
|
||||
|
||||
if (EmitSync & 1024) {
|
||||
// Emit code to check that _owner == Self
|
||||
// We could fold the _owner test into subsequent code more efficiently
|
||||
// than using a stand-alone check, but since _owner checking is off by
|
||||
// default we don't bother. We also might consider predicating the
|
||||
// _owner==Self check on Xcheck:jni or running on a debug build.
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), Rscratch);
|
||||
orcc(Rscratch, G0, G0);
|
||||
brx(Assembler::notZero, false, Assembler::pn, done);
|
||||
delayed()->nop();
|
||||
}
|
||||
// 1-0 form : avoids CAS and MEMBAR in the common case
|
||||
// Do not bother to ratify that m->Owner == Self.
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
|
||||
orcc(Rbox, G0, G0);
|
||||
brx(Assembler::notZero, false, Assembler::pn, done);
|
||||
delayed()->
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
|
||||
orcc(Rbox, Rscratch, G0);
|
||||
brx(Assembler::zero, false, Assembler::pt, done);
|
||||
delayed()->
|
||||
st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
|
||||
if (EmitSync & 512) {
|
||||
// classic lock release code absent 1-0 locking
|
||||
// m->Owner = null;
|
||||
// membar #storeload
|
||||
// if (m->cxq|m->EntryList) == null goto Success
|
||||
// if (m->succ != null) goto Success
|
||||
// if CAS (&m->Owner,0,Self) != 0 goto Success
|
||||
// goto SlowPath
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
|
||||
orcc(Rbox, G0, G0);
|
||||
brx(Assembler::notZero, false, Assembler::pn, done);
|
||||
delayed()->nop();
|
||||
st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
if (os::is_MP()) { membar(StoreLoad); }
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
|
||||
orcc(Rbox, Rscratch, G0);
|
||||
brx(Assembler::zero, false, Assembler::pt, done);
|
||||
delayed()->
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
|
||||
andcc(Rscratch, Rscratch, G0);
|
||||
brx(Assembler::notZero, false, Assembler::pt, done);
|
||||
delayed()->andcc(G0, G0, G0);
|
||||
add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
|
||||
mov(G2_thread, Rscratch);
|
||||
cas_ptr(Rmark, G0, Rscratch);
|
||||
cmp(Rscratch, G0);
|
||||
// invert icc.zf and goto done
|
||||
brx(Assembler::notZero, false, Assembler::pt, done);
|
||||
delayed()->cmp(G0, G0);
|
||||
br(Assembler::always, false, Assembler::pt, done);
|
||||
delayed()->cmp(G0, 1);
|
||||
} else {
|
||||
// 1-0 form : avoids CAS and MEMBAR in the common case
|
||||
// Do not bother to ratify that m->Owner == Self.
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
|
||||
orcc(Rbox, G0, G0);
|
||||
brx(Assembler::notZero, false, Assembler::pn, done);
|
||||
delayed()->
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
|
||||
orcc(Rbox, Rscratch, G0);
|
||||
if (EmitSync & 16384) {
|
||||
// As an optional optimization, if (EntryList|cxq) != null and _succ is null then
|
||||
// we should transfer control directly to the slow-path.
|
||||
// This test makes the reacquire operation below very infrequent.
|
||||
// The logic is equivalent to :
|
||||
// if (cxq|EntryList) == null : Owner=null; goto Success
|
||||
// if succ == null : goto SlowPath
|
||||
// Owner=null; membar #storeload
|
||||
// if succ != null : goto Success
|
||||
// if CAS(&Owner,null,Self) != null goto Success
|
||||
// goto SlowPath
|
||||
brx(Assembler::zero, true, Assembler::pt, done);
|
||||
delayed()->
|
||||
st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
|
||||
andcc(Rscratch, Rscratch, G0) ;
|
||||
brx(Assembler::zero, false, Assembler::pt, done);
|
||||
delayed()->orcc(G0, 1, G0);
|
||||
st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
} else {
|
||||
brx(Assembler::zero, false, Assembler::pt, done);
|
||||
delayed()->
|
||||
st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
}
|
||||
if (os::is_MP()) { membar(StoreLoad); }
|
||||
// Check that _succ is (or remains) non-zero
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
|
||||
andcc(Rscratch, Rscratch, G0);
|
||||
brx(Assembler::notZero, false, Assembler::pt, done);
|
||||
delayed()->andcc(G0, G0, G0);
|
||||
add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
|
||||
mov(G2_thread, Rscratch);
|
||||
cas_ptr(Rmark, G0, Rscratch);
|
||||
cmp(Rscratch, G0);
|
||||
// invert icc.zf and goto done
|
||||
// A slightly better v8+/v9 idiom would be the following:
|
||||
// movrnz Rscratch,1,Rscratch
|
||||
// ba done
|
||||
// xorcc Rscratch,1,G0
|
||||
// In v8+ mode the idiom would be valid IFF Rscratch was a G or O register
|
||||
brx(Assembler::notZero, false, Assembler::pt, done);
|
||||
delayed()->cmp(G0, G0);
|
||||
br(Assembler::always, false, Assembler::pt, done);
|
||||
delayed()->cmp(G0, 1);
|
||||
}
|
||||
if (os::is_MP()) { membar(StoreLoad); }
|
||||
// Check that _succ is (or remains) non-zero
|
||||
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
|
||||
andcc(Rscratch, Rscratch, G0);
|
||||
brx(Assembler::notZero, false, Assembler::pt, done);
|
||||
delayed()->andcc(G0, G0, G0);
|
||||
add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
|
||||
mov(G2_thread, Rscratch);
|
||||
cas_ptr(Rmark, G0, Rscratch);
|
||||
cmp(Rscratch, G0);
|
||||
// invert icc.zf and goto done
|
||||
// A slightly better v8+/v9 idiom would be the following:
|
||||
// movrnz Rscratch,1,Rscratch
|
||||
// ba done
|
||||
// xorcc Rscratch,1,G0
|
||||
// In v8+ mode the idiom would be valid IFF Rscratch was a G or O register
|
||||
brx(Assembler::notZero, false, Assembler::pt, done);
|
||||
delayed()->cmp(G0, G0);
|
||||
br(Assembler::always, false, Assembler::pt, done);
|
||||
delayed()->cmp(G0, 1);
|
||||
|
||||
bind (LStacked);
|
||||
// Consider: we could replace the expensive CAS in the exit
|
||||
|
@ -1721,227 +1721,160 @@ void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg
|
||||
if (counters != NULL) {
|
||||
atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
|
||||
}
|
||||
if (EmitSync & 1) {
|
||||
// set box->dhw = markOopDesc::unused_mark()
|
||||
// Force all sync thru slow-path: slow_enter() and slow_exit()
|
||||
movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
|
||||
cmpptr (rsp, (int32_t)NULL_WORD);
|
||||
} else {
|
||||
// Possible cases that we'll encounter in fast_lock
|
||||
// ------------------------------------------------
|
||||
// * Inflated
|
||||
// -- unlocked
|
||||
// -- Locked
|
||||
// = by self
|
||||
// = by other
|
||||
// * biased
|
||||
// -- by Self
|
||||
// -- by other
|
||||
// * neutral
|
||||
// * stack-locked
|
||||
// -- by self
|
||||
// = sp-proximity test hits
|
||||
// = sp-proximity test generates false-negative
|
||||
// -- by other
|
||||
//
|
||||
|
||||
Label IsInflated, DONE_LABEL;
|
||||
// Possible cases that we'll encounter in fast_lock
|
||||
// ------------------------------------------------
|
||||
// * Inflated
|
||||
// -- unlocked
|
||||
// -- Locked
|
||||
// = by self
|
||||
// = by other
|
||||
// * biased
|
||||
// -- by Self
|
||||
// -- by other
|
||||
// * neutral
|
||||
// * stack-locked
|
||||
// -- by self
|
||||
// = sp-proximity test hits
|
||||
// = sp-proximity test generates false-negative
|
||||
// -- by other
|
||||
//
|
||||
|
||||
// it's stack-locked, biased or neutral
|
||||
// TODO: optimize away redundant LDs of obj->mark and improve the markword triage
|
||||
// order to reduce the number of conditional branches in the most common cases.
|
||||
// Beware -- there's a subtle invariant that fetch of the markword
|
||||
// at [FETCH], below, will never observe a biased encoding (*101b).
|
||||
// If this invariant is not held we risk exclusion (safety) failure.
|
||||
if (UseBiasedLocking && !UseOptoBiasInlining) {
|
||||
biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
|
||||
}
|
||||
Label IsInflated, DONE_LABEL;
|
||||
|
||||
// it's stack-locked, biased or neutral
|
||||
// TODO: optimize away redundant LDs of obj->mark and improve the markword triage
|
||||
// order to reduce the number of conditional branches in the most common cases.
|
||||
// Beware -- there's a subtle invariant that fetch of the markword
|
||||
// at [FETCH], below, will never observe a biased encoding (*101b).
|
||||
// If this invariant is not held we risk exclusion (safety) failure.
|
||||
if (UseBiasedLocking && !UseOptoBiasInlining) {
|
||||
biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
|
||||
}
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (UseRTMForStackLocks && use_rtm) {
|
||||
rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
|
||||
stack_rtm_counters, method_data, profile_rtm,
|
||||
DONE_LABEL, IsInflated);
|
||||
}
|
||||
if (UseRTMForStackLocks && use_rtm) {
|
||||
rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
|
||||
stack_rtm_counters, method_data, profile_rtm,
|
||||
DONE_LABEL, IsInflated);
|
||||
}
|
||||
#endif // INCLUDE_RTM_OPT
|
||||
|
||||
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
|
||||
testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
|
||||
jccb(Assembler::notZero, IsInflated);
|
||||
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
|
||||
testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
|
||||
jccb(Assembler::notZero, IsInflated);
|
||||
|
||||
// Attempt stack-locking ...
|
||||
orptr (tmpReg, markOopDesc::unlocked_value);
|
||||
movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
|
||||
if (os::is_MP()) {
|
||||
lock();
|
||||
}
|
||||
cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
|
||||
if (counters != NULL) {
|
||||
cond_inc32(Assembler::equal,
|
||||
ExternalAddress((address)counters->fast_path_entry_count_addr()));
|
||||
}
|
||||
jcc(Assembler::equal, DONE_LABEL); // Success
|
||||
// Attempt stack-locking ...
|
||||
orptr (tmpReg, markOopDesc::unlocked_value);
|
||||
movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
|
||||
if (os::is_MP()) {
|
||||
lock();
|
||||
}
|
||||
cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
|
||||
if (counters != NULL) {
|
||||
cond_inc32(Assembler::equal,
|
||||
ExternalAddress((address)counters->fast_path_entry_count_addr()));
|
||||
}
|
||||
jcc(Assembler::equal, DONE_LABEL); // Success
|
||||
|
||||
// Recursive locking.
|
||||
// The object is stack-locked: markword contains stack pointer to BasicLock.
|
||||
// Locked by current thread if difference with current SP is less than one page.
|
||||
subptr(tmpReg, rsp);
|
||||
// Next instruction set ZFlag == 1 (Success) if difference is less then one page.
|
||||
andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
|
||||
movptr(Address(boxReg, 0), tmpReg);
|
||||
if (counters != NULL) {
|
||||
cond_inc32(Assembler::equal,
|
||||
ExternalAddress((address)counters->fast_path_entry_count_addr()));
|
||||
}
|
||||
jmp(DONE_LABEL);
|
||||
// Recursive locking.
|
||||
// The object is stack-locked: markword contains stack pointer to BasicLock.
|
||||
// Locked by current thread if difference with current SP is less than one page.
|
||||
subptr(tmpReg, rsp);
|
||||
// Next instruction set ZFlag == 1 (Success) if difference is less then one page.
|
||||
andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
|
||||
movptr(Address(boxReg, 0), tmpReg);
|
||||
if (counters != NULL) {
|
||||
cond_inc32(Assembler::equal,
|
||||
ExternalAddress((address)counters->fast_path_entry_count_addr()));
|
||||
}
|
||||
jmp(DONE_LABEL);
|
||||
|
||||
bind(IsInflated);
|
||||
// The object is inflated. tmpReg contains pointer to ObjectMonitor* + markOopDesc::monitor_value
|
||||
bind(IsInflated);
|
||||
// The object is inflated. tmpReg contains pointer to ObjectMonitor* + markOopDesc::monitor_value
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
// Use the same RTM locking code in 32- and 64-bit VM.
|
||||
if (use_rtm) {
|
||||
rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
|
||||
rtm_counters, method_data, profile_rtm, DONE_LABEL);
|
||||
} else {
|
||||
// Use the same RTM locking code in 32- and 64-bit VM.
|
||||
if (use_rtm) {
|
||||
rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
|
||||
rtm_counters, method_data, profile_rtm, DONE_LABEL);
|
||||
} else {
|
||||
#endif // INCLUDE_RTM_OPT
|
||||
|
||||
#ifndef _LP64
|
||||
// The object is inflated.
|
||||
// The object is inflated.
|
||||
|
||||
// boxReg refers to the on-stack BasicLock in the current frame.
|
||||
// We'd like to write:
|
||||
// set box->_displaced_header = markOopDesc::unused_mark(). Any non-0 value suffices.
|
||||
// This is convenient but results a ST-before-CAS penalty. The following CAS suffers
|
||||
// additional latency as we have another ST in the store buffer that must drain.
|
||||
// boxReg refers to the on-stack BasicLock in the current frame.
|
||||
// We'd like to write:
|
||||
// set box->_displaced_header = markOopDesc::unused_mark(). Any non-0 value suffices.
|
||||
// This is convenient but results a ST-before-CAS penalty. The following CAS suffers
|
||||
// additional latency as we have another ST in the store buffer that must drain.
|
||||
|
||||
if (EmitSync & 8192) {
|
||||
movptr(Address(boxReg, 0), 3); // results in ST-before-CAS penalty
|
||||
get_thread (scrReg);
|
||||
movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
|
||||
movptr(tmpReg, NULL_WORD); // consider: xor vs mov
|
||||
if (os::is_MP()) {
|
||||
lock();
|
||||
}
|
||||
cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
} else
|
||||
if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
|
||||
// register juggle because we need tmpReg for cmpxchgptr below
|
||||
movptr(scrReg, boxReg);
|
||||
movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
|
||||
// avoid ST-before-CAS
|
||||
// register juggle because we need tmpReg for cmpxchgptr below
|
||||
movptr(scrReg, boxReg);
|
||||
movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
|
||||
|
||||
// Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
|
||||
if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
|
||||
// prefetchw [eax + Offset(_owner)-2]
|
||||
prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
}
|
||||
// Optimistic form: consider XORL tmpReg,tmpReg
|
||||
movptr(tmpReg, NULL_WORD);
|
||||
|
||||
if ((EmitSync & 64) == 0) {
|
||||
// Optimistic form: consider XORL tmpReg,tmpReg
|
||||
movptr(tmpReg, NULL_WORD);
|
||||
} else {
|
||||
// Can suffer RTS->RTO upgrades on shared or cold $ lines
|
||||
// Test-And-CAS instead of CAS
|
||||
movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // rax, = m->_owner
|
||||
testptr(tmpReg, tmpReg); // Locked ?
|
||||
jccb (Assembler::notZero, DONE_LABEL);
|
||||
}
|
||||
// Appears unlocked - try to swing _owner from null to non-null.
|
||||
// Ideally, I'd manifest "Self" with get_thread and then attempt
|
||||
// to CAS the register containing Self into m->Owner.
|
||||
// But we don't have enough registers, so instead we can either try to CAS
|
||||
// rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
|
||||
// we later store "Self" into m->Owner. Transiently storing a stack address
|
||||
// (rsp or the address of the box) into m->owner is harmless.
|
||||
// Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
|
||||
if (os::is_MP()) {
|
||||
lock();
|
||||
}
|
||||
cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
|
||||
// If we weren't able to swing _owner from NULL to the BasicLock
|
||||
// then take the slow path.
|
||||
jccb (Assembler::notZero, DONE_LABEL);
|
||||
// update _owner from BasicLock to thread
|
||||
get_thread (scrReg); // beware: clobbers ICCs
|
||||
movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
|
||||
xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
|
||||
|
||||
// Appears unlocked - try to swing _owner from null to non-null.
|
||||
// Ideally, I'd manifest "Self" with get_thread and then attempt
|
||||
// to CAS the register containing Self into m->Owner.
|
||||
// But we don't have enough registers, so instead we can either try to CAS
|
||||
// rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
|
||||
// we later store "Self" into m->Owner. Transiently storing a stack address
|
||||
// (rsp or the address of the box) into m->owner is harmless.
|
||||
// Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
|
||||
if (os::is_MP()) {
|
||||
lock();
|
||||
}
|
||||
cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
|
||||
// If we weren't able to swing _owner from NULL to the BasicLock
|
||||
// then take the slow path.
|
||||
jccb (Assembler::notZero, DONE_LABEL);
|
||||
// update _owner from BasicLock to thread
|
||||
get_thread (scrReg); // beware: clobbers ICCs
|
||||
movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
|
||||
xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
|
||||
|
||||
// If the CAS fails we can either retry or pass control to the slow-path.
|
||||
// We use the latter tactic.
|
||||
// Pass the CAS result in the icc.ZFlag into DONE_LABEL
|
||||
// If the CAS was successful ...
|
||||
// Self has acquired the lock
|
||||
// Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
|
||||
// Intentional fall-through into DONE_LABEL ...
|
||||
} else {
|
||||
movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())); // results in ST-before-CAS penalty
|
||||
movptr(boxReg, tmpReg);
|
||||
|
||||
// Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
|
||||
if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
|
||||
// prefetchw [eax + Offset(_owner)-2]
|
||||
prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
}
|
||||
|
||||
if ((EmitSync & 64) == 0) {
|
||||
// Optimistic form
|
||||
xorptr (tmpReg, tmpReg);
|
||||
} else {
|
||||
// Can suffer RTS->RTO upgrades on shared or cold $ lines
|
||||
movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // rax, = m->_owner
|
||||
testptr(tmpReg, tmpReg); // Locked ?
|
||||
jccb (Assembler::notZero, DONE_LABEL);
|
||||
}
|
||||
|
||||
// Appears unlocked - try to swing _owner from null to non-null.
|
||||
// Use either "Self" (in scr) or rsp as thread identity in _owner.
|
||||
// Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
|
||||
get_thread (scrReg);
|
||||
if (os::is_MP()) {
|
||||
lock();
|
||||
}
|
||||
cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
|
||||
// If the CAS fails we can either retry or pass control to the slow-path.
|
||||
// We use the latter tactic.
|
||||
// Pass the CAS result in the icc.ZFlag into DONE_LABEL
|
||||
// If the CAS was successful ...
|
||||
// Self has acquired the lock
|
||||
// Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
|
||||
// Intentional fall-through into DONE_LABEL ...
|
||||
}
|
||||
// If the CAS fails we can either retry or pass control to the slow-path.
|
||||
// We use the latter tactic.
|
||||
// Pass the CAS result in the icc.ZFlag into DONE_LABEL
|
||||
// If the CAS was successful ...
|
||||
// Self has acquired the lock
|
||||
// Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
|
||||
// Intentional fall-through into DONE_LABEL ...
|
||||
#else // _LP64
|
||||
// It's inflated
|
||||
movq(scrReg, tmpReg);
|
||||
xorq(tmpReg, tmpReg);
|
||||
// It's inflated
|
||||
movq(scrReg, tmpReg);
|
||||
xorq(tmpReg, tmpReg);
|
||||
|
||||
if (os::is_MP()) {
|
||||
lock();
|
||||
}
|
||||
cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
// Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
|
||||
// Without cast to int32_t movptr will destroy r10 which is typically obj.
|
||||
movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
|
||||
// Intentional fall-through into DONE_LABEL ...
|
||||
// Propagate ICC.ZF from CAS above into DONE_LABEL.
|
||||
if (os::is_MP()) {
|
||||
lock();
|
||||
}
|
||||
cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
// Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
|
||||
// Without cast to int32_t movptr will destroy r10 which is typically obj.
|
||||
movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
|
||||
// Intentional fall-through into DONE_LABEL ...
|
||||
// Propagate ICC.ZF from CAS above into DONE_LABEL.
|
||||
#endif // _LP64
|
||||
#if INCLUDE_RTM_OPT
|
||||
} // use_rtm()
|
||||
} // use_rtm()
|
||||
#endif
|
||||
// DONE_LABEL is a hot target - we'd really like to place it at the
|
||||
// start of cache line by padding with NOPs.
|
||||
// See the AMD and Intel software optimization manuals for the
|
||||
// most efficient "long" NOP encodings.
|
||||
// Unfortunately none of our alignment mechanisms suffice.
|
||||
bind(DONE_LABEL);
|
||||
// DONE_LABEL is a hot target - we'd really like to place it at the
|
||||
// start of cache line by padding with NOPs.
|
||||
// See the AMD and Intel software optimization manuals for the
|
||||
// most efficient "long" NOP encodings.
|
||||
// Unfortunately none of our alignment mechanisms suffice.
|
||||
bind(DONE_LABEL);
|
||||
|
||||
// At DONE_LABEL the icc ZFlag is set as follows ...
|
||||
// Fast_Unlock uses the same protocol.
|
||||
// ZFlag == 1 -> Success
|
||||
// ZFlag == 0 -> Failure - force control through the slow-path
|
||||
}
|
||||
// At DONE_LABEL the icc ZFlag is set as follows ...
|
||||
// Fast_Unlock uses the same protocol.
|
||||
// ZFlag == 1 -> Success
|
||||
// ZFlag == 0 -> Failure - force control through the slow-path
|
||||
}
|
||||
|
||||
// obj: object to unlock
|
||||
@ -1980,293 +1913,179 @@ void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpR
|
||||
assert(boxReg == rax, "");
|
||||
assert_different_registers(objReg, boxReg, tmpReg);
|
||||
|
||||
if (EmitSync & 4) {
|
||||
// Disable - inhibit all inlining. Force control through the slow-path
|
||||
cmpptr (rsp, 0);
|
||||
} else {
|
||||
Label DONE_LABEL, Stacked, CheckSucc;
|
||||
Label DONE_LABEL, Stacked, CheckSucc;
|
||||
|
||||
// Critically, the biased locking test must have precedence over
|
||||
// and appear before the (box->dhw == 0) recursive stack-lock test.
|
||||
if (UseBiasedLocking && !UseOptoBiasInlining) {
|
||||
biased_locking_exit(objReg, tmpReg, DONE_LABEL);
|
||||
}
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (UseRTMForStackLocks && use_rtm) {
|
||||
assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
|
||||
Label L_regular_unlock;
|
||||
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
|
||||
andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
|
||||
cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
|
||||
jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
|
||||
xend(); // otherwise end...
|
||||
jmp(DONE_LABEL); // ... and we're done
|
||||
bind(L_regular_unlock);
|
||||
}
|
||||
#endif
|
||||
|
||||
cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
|
||||
jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
|
||||
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
|
||||
testptr(tmpReg, markOopDesc::monitor_value); // Inflated?
|
||||
jccb (Assembler::zero, Stacked);
|
||||
|
||||
// It's inflated.
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (use_rtm) {
|
||||
Label L_regular_inflated_unlock;
|
||||
int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
|
||||
movptr(boxReg, Address(tmpReg, owner_offset));
|
||||
testptr(boxReg, boxReg);
|
||||
jccb(Assembler::notZero, L_regular_inflated_unlock);
|
||||
xend();
|
||||
jmpb(DONE_LABEL);
|
||||
bind(L_regular_inflated_unlock);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Despite our balanced locking property we still check that m->_owner == Self
|
||||
// as java routines or native JNI code called by this thread might
|
||||
// have released the lock.
|
||||
// Refer to the comments in synchronizer.cpp for how we might encode extra
|
||||
// state in _succ so we can avoid fetching EntryList|cxq.
|
||||
//
|
||||
// I'd like to add more cases in fast_lock() and fast_unlock() --
|
||||
// such as recursive enter and exit -- but we have to be wary of
|
||||
// I$ bloat, T$ effects and BP$ effects.
|
||||
//
|
||||
// If there's no contention try a 1-0 exit. That is, exit without
|
||||
// a costly MEMBAR or CAS. See synchronizer.cpp for details on how
|
||||
// we detect and recover from the race that the 1-0 exit admits.
|
||||
//
|
||||
// Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
|
||||
// before it STs null into _owner, releasing the lock. Updates
|
||||
// to data protected by the critical section must be visible before
|
||||
// we drop the lock (and thus before any other thread could acquire
|
||||
// the lock and observe the fields protected by the lock).
|
||||
// IA32's memory-model is SPO, so STs are ordered with respect to
|
||||
// each other and there's no need for an explicit barrier (fence).
|
||||
// See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
|
||||
#ifndef _LP64
|
||||
get_thread (boxReg);
|
||||
if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
|
||||
// prefetchw [ebx + Offset(_owner)-2]
|
||||
prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
}
|
||||
|
||||
// Note that we could employ various encoding schemes to reduce
|
||||
// the number of loads below (currently 4) to just 2 or 3.
|
||||
// Refer to the comments in synchronizer.cpp.
|
||||
// In practice the chain of fetches doesn't seem to impact performance, however.
|
||||
xorptr(boxReg, boxReg);
|
||||
if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
|
||||
// Attempt to reduce branch density - AMD's branch predictor.
|
||||
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
|
||||
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
|
||||
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
|
||||
jccb (Assembler::notZero, DONE_LABEL);
|
||||
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
|
||||
jmpb (DONE_LABEL);
|
||||
} else {
|
||||
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
|
||||
jccb (Assembler::notZero, DONE_LABEL);
|
||||
movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
|
||||
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
|
||||
jccb (Assembler::notZero, CheckSucc);
|
||||
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
|
||||
jmpb (DONE_LABEL);
|
||||
}
|
||||
|
||||
// The Following code fragment (EmitSync & 65536) improves the performance of
|
||||
// contended applications and contended synchronization microbenchmarks.
|
||||
// Unfortunately the emission of the code - even though not executed - causes regressions
|
||||
// in scimark and jetstream, evidently because of $ effects. Replacing the code
|
||||
// with an equal number of never-executed NOPs results in the same regression.
|
||||
// We leave it off by default.
|
||||
|
||||
if ((EmitSync & 65536) != 0) {
|
||||
Label LSuccess, LGoSlowPath ;
|
||||
|
||||
bind (CheckSucc);
|
||||
|
||||
// Optional pre-test ... it's safe to elide this
|
||||
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
|
||||
jccb(Assembler::zero, LGoSlowPath);
|
||||
|
||||
// We have a classic Dekker-style idiom:
|
||||
// ST m->_owner = 0 ; MEMBAR; LD m->_succ
|
||||
// There are a number of ways to implement the barrier:
|
||||
// (1) lock:andl &m->_owner, 0
|
||||
// is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
|
||||
// LOCK: ANDL [ebx+Offset(_Owner)-2], 0
|
||||
// Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
|
||||
// (2) If supported, an explicit MFENCE is appealing.
|
||||
// In older IA32 processors MFENCE is slower than lock:add or xchg
|
||||
// particularly if the write-buffer is full as might be the case if
|
||||
// if stores closely precede the fence or fence-equivalent instruction.
|
||||
// See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
|
||||
// as the situation has changed with Nehalem and Shanghai.
|
||||
// (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
|
||||
// The $lines underlying the top-of-stack should be in M-state.
|
||||
// The locked add instruction is serializing, of course.
|
||||
// (4) Use xchg, which is serializing
|
||||
// mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
|
||||
// (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
|
||||
// The integer condition codes will tell us if succ was 0.
|
||||
// Since _succ and _owner should reside in the same $line and
|
||||
// we just stored into _owner, it's likely that the $line
|
||||
// remains in M-state for the lock:orl.
|
||||
//
|
||||
// We currently use (3), although it's likely that switching to (2)
|
||||
// is correct for the future.
|
||||
|
||||
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
|
||||
if (os::is_MP()) {
|
||||
lock(); addptr(Address(rsp, 0), 0);
|
||||
}
|
||||
// Ratify _succ remains non-null
|
||||
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
|
||||
jccb (Assembler::notZero, LSuccess);
|
||||
|
||||
xorptr(boxReg, boxReg); // box is really EAX
|
||||
if (os::is_MP()) { lock(); }
|
||||
cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
// There's no successor so we tried to regrab the lock with the
|
||||
// placeholder value. If that didn't work, then another thread
|
||||
// grabbed the lock so we're done (and exit was a success).
|
||||
jccb (Assembler::notEqual, LSuccess);
|
||||
// Since we're low on registers we installed rsp as a placeholding in _owner.
|
||||
// Now install Self over rsp. This is safe as we're transitioning from
|
||||
// non-null to non=null
|
||||
get_thread (boxReg);
|
||||
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
|
||||
// Intentional fall-through into LGoSlowPath ...
|
||||
|
||||
bind (LGoSlowPath);
|
||||
orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure
|
||||
jmpb (DONE_LABEL);
|
||||
|
||||
bind (LSuccess);
|
||||
xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success
|
||||
jmpb (DONE_LABEL);
|
||||
}
|
||||
|
||||
bind (Stacked);
|
||||
// It's not inflated and it's not recursively stack-locked and it's not biased.
|
||||
// It must be stack-locked.
|
||||
// Try to reset the header to displaced header.
|
||||
// The "box" value on the stack is stable, so we can reload
|
||||
// and be assured we observe the same value as above.
|
||||
movptr(tmpReg, Address(boxReg, 0));
|
||||
if (os::is_MP()) {
|
||||
lock();
|
||||
}
|
||||
cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
|
||||
// Intention fall-thru into DONE_LABEL
|
||||
|
||||
// DONE_LABEL is a hot target - we'd really like to place it at the
|
||||
// start of cache line by padding with NOPs.
|
||||
// See the AMD and Intel software optimization manuals for the
|
||||
// most efficient "long" NOP encodings.
|
||||
// Unfortunately none of our alignment mechanisms suffice.
|
||||
if ((EmitSync & 65536) == 0) {
|
||||
bind (CheckSucc);
|
||||
}
|
||||
#else // _LP64
|
||||
// It's inflated
|
||||
if (EmitSync & 1024) {
|
||||
// Emit code to check that _owner == Self
|
||||
// We could fold the _owner test into subsequent code more efficiently
|
||||
// than using a stand-alone check, but since _owner checking is off by
|
||||
// default we don't bother. We also might consider predicating the
|
||||
// _owner==Self check on Xcheck:jni or running on a debug build.
|
||||
movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
xorptr(boxReg, r15_thread);
|
||||
} else {
|
||||
xorptr(boxReg, boxReg);
|
||||
}
|
||||
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
|
||||
jccb (Assembler::notZero, DONE_LABEL);
|
||||
movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
|
||||
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
|
||||
jccb (Assembler::notZero, CheckSucc);
|
||||
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
|
||||
jmpb (DONE_LABEL);
|
||||
|
||||
if ((EmitSync & 65536) == 0) {
|
||||
// Try to avoid passing control into the slow_path ...
|
||||
Label LSuccess, LGoSlowPath ;
|
||||
bind (CheckSucc);
|
||||
|
||||
// The following optional optimization can be elided if necessary
|
||||
// Effectively: if (succ == null) goto SlowPath
|
||||
// The code reduces the window for a race, however,
|
||||
// and thus benefits performance.
|
||||
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
|
||||
jccb (Assembler::zero, LGoSlowPath);
|
||||
|
||||
xorptr(boxReg, boxReg);
|
||||
if ((EmitSync & 16) && os::is_MP()) {
|
||||
xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
} else {
|
||||
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
|
||||
if (os::is_MP()) {
|
||||
// Memory barrier/fence
|
||||
// Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
|
||||
// Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
|
||||
// This is faster on Nehalem and AMD Shanghai/Barcelona.
|
||||
// See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
|
||||
// We might also restructure (ST Owner=0;barrier;LD _Succ) to
|
||||
// (mov box,0; xchgq box, &m->Owner; LD _succ) .
|
||||
lock(); addl(Address(rsp, 0), 0);
|
||||
}
|
||||
}
|
||||
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
|
||||
jccb (Assembler::notZero, LSuccess);
|
||||
|
||||
// Rare inopportune interleaving - race.
|
||||
// The successor vanished in the small window above.
|
||||
// The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
|
||||
// We need to ensure progress and succession.
|
||||
// Try to reacquire the lock.
|
||||
// If that fails then the new owner is responsible for succession and this
|
||||
// thread needs to take no further action and can exit via the fast path (success).
|
||||
// If the re-acquire succeeds then pass control into the slow path.
|
||||
// As implemented, this latter mode is horrible because we generated more
|
||||
// coherence traffic on the lock *and* artifically extended the critical section
|
||||
// length while by virtue of passing control into the slow path.
|
||||
|
||||
// box is really RAX -- the following CMPXCHG depends on that binding
|
||||
// cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
|
||||
if (os::is_MP()) { lock(); }
|
||||
cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
// There's no successor so we tried to regrab the lock.
|
||||
// If that didn't work, then another thread grabbed the
|
||||
// lock so we're done (and exit was a success).
|
||||
jccb (Assembler::notEqual, LSuccess);
|
||||
// Intentional fall-through into slow-path
|
||||
|
||||
bind (LGoSlowPath);
|
||||
orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
|
||||
jmpb (DONE_LABEL);
|
||||
|
||||
bind (LSuccess);
|
||||
testl (boxReg, 0); // set ICC.ZF=1 to indicate success
|
||||
jmpb (DONE_LABEL);
|
||||
}
|
||||
|
||||
bind (Stacked);
|
||||
movptr(tmpReg, Address (boxReg, 0)); // re-fetch
|
||||
if (os::is_MP()) { lock(); }
|
||||
cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
|
||||
|
||||
if (EmitSync & 65536) {
|
||||
bind (CheckSucc);
|
||||
}
|
||||
#endif
|
||||
bind(DONE_LABEL);
|
||||
// Critically, the biased locking test must have precedence over
|
||||
// and appear before the (box->dhw == 0) recursive stack-lock test.
|
||||
if (UseBiasedLocking && !UseOptoBiasInlining) {
|
||||
biased_locking_exit(objReg, tmpReg, DONE_LABEL);
|
||||
}
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (UseRTMForStackLocks && use_rtm) {
|
||||
assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
|
||||
Label L_regular_unlock;
|
||||
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
|
||||
andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
|
||||
cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
|
||||
jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
|
||||
xend(); // otherwise end...
|
||||
jmp(DONE_LABEL); // ... and we're done
|
||||
bind(L_regular_unlock);
|
||||
}
|
||||
#endif
|
||||
|
||||
cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
|
||||
jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
|
||||
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
|
||||
testptr(tmpReg, markOopDesc::monitor_value); // Inflated?
|
||||
jccb (Assembler::zero, Stacked);
|
||||
|
||||
// It's inflated.
|
||||
#if INCLUDE_RTM_OPT
|
||||
if (use_rtm) {
|
||||
Label L_regular_inflated_unlock;
|
||||
int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
|
||||
movptr(boxReg, Address(tmpReg, owner_offset));
|
||||
testptr(boxReg, boxReg);
|
||||
jccb(Assembler::notZero, L_regular_inflated_unlock);
|
||||
xend();
|
||||
jmpb(DONE_LABEL);
|
||||
bind(L_regular_inflated_unlock);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Despite our balanced locking property we still check that m->_owner == Self
|
||||
// as java routines or native JNI code called by this thread might
|
||||
// have released the lock.
|
||||
// Refer to the comments in synchronizer.cpp for how we might encode extra
|
||||
// state in _succ so we can avoid fetching EntryList|cxq.
|
||||
//
|
||||
// I'd like to add more cases in fast_lock() and fast_unlock() --
|
||||
// such as recursive enter and exit -- but we have to be wary of
|
||||
// I$ bloat, T$ effects and BP$ effects.
|
||||
//
|
||||
// If there's no contention try a 1-0 exit. That is, exit without
|
||||
// a costly MEMBAR or CAS. See synchronizer.cpp for details on how
|
||||
// we detect and recover from the race that the 1-0 exit admits.
|
||||
//
|
||||
// Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
|
||||
// before it STs null into _owner, releasing the lock. Updates
|
||||
// to data protected by the critical section must be visible before
|
||||
// we drop the lock (and thus before any other thread could acquire
|
||||
// the lock and observe the fields protected by the lock).
|
||||
// IA32's memory-model is SPO, so STs are ordered with respect to
|
||||
// each other and there's no need for an explicit barrier (fence).
|
||||
// See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
|
||||
#ifndef _LP64
|
||||
get_thread (boxReg);
|
||||
|
||||
// Note that we could employ various encoding schemes to reduce
|
||||
// the number of loads below (currently 4) to just 2 or 3.
|
||||
// Refer to the comments in synchronizer.cpp.
|
||||
// In practice the chain of fetches doesn't seem to impact performance, however.
|
||||
xorptr(boxReg, boxReg);
|
||||
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
|
||||
jccb (Assembler::notZero, DONE_LABEL);
|
||||
movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
|
||||
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
|
||||
jccb (Assembler::notZero, CheckSucc);
|
||||
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
|
||||
jmpb (DONE_LABEL);
|
||||
|
||||
bind (Stacked);
|
||||
// It's not inflated and it's not recursively stack-locked and it's not biased.
|
||||
// It must be stack-locked.
|
||||
// Try to reset the header to displaced header.
|
||||
// The "box" value on the stack is stable, so we can reload
|
||||
// and be assured we observe the same value as above.
|
||||
movptr(tmpReg, Address(boxReg, 0));
|
||||
if (os::is_MP()) {
|
||||
lock();
|
||||
}
|
||||
cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
|
||||
// Intention fall-thru into DONE_LABEL
|
||||
|
||||
// DONE_LABEL is a hot target - we'd really like to place it at the
|
||||
// start of cache line by padding with NOPs.
|
||||
// See the AMD and Intel software optimization manuals for the
|
||||
// most efficient "long" NOP encodings.
|
||||
// Unfortunately none of our alignment mechanisms suffice.
|
||||
bind (CheckSucc);
|
||||
#else // _LP64
|
||||
// It's inflated
|
||||
xorptr(boxReg, boxReg);
|
||||
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
|
||||
jccb (Assembler::notZero, DONE_LABEL);
|
||||
movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
|
||||
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
|
||||
jccb (Assembler::notZero, CheckSucc);
|
||||
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
|
||||
jmpb (DONE_LABEL);
|
||||
|
||||
// Try to avoid passing control into the slow_path ...
|
||||
Label LSuccess, LGoSlowPath ;
|
||||
bind (CheckSucc);
|
||||
|
||||
// The following optional optimization can be elided if necessary
|
||||
// Effectively: if (succ == null) goto SlowPath
|
||||
// The code reduces the window for a race, however,
|
||||
// and thus benefits performance.
|
||||
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
|
||||
jccb (Assembler::zero, LGoSlowPath);
|
||||
|
||||
xorptr(boxReg, boxReg);
|
||||
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
|
||||
if (os::is_MP()) {
|
||||
// Memory barrier/fence
|
||||
// Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
|
||||
// Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
|
||||
// This is faster on Nehalem and AMD Shanghai/Barcelona.
|
||||
// See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
|
||||
// We might also restructure (ST Owner=0;barrier;LD _Succ) to
|
||||
// (mov box,0; xchgq box, &m->Owner; LD _succ) .
|
||||
lock(); addl(Address(rsp, 0), 0);
|
||||
}
|
||||
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
|
||||
jccb (Assembler::notZero, LSuccess);
|
||||
|
||||
// Rare inopportune interleaving - race.
|
||||
// The successor vanished in the small window above.
|
||||
// The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
|
||||
// We need to ensure progress and succession.
|
||||
// Try to reacquire the lock.
|
||||
// If that fails then the new owner is responsible for succession and this
|
||||
// thread needs to take no further action and can exit via the fast path (success).
|
||||
// If the re-acquire succeeds then pass control into the slow path.
|
||||
// As implemented, this latter mode is horrible because we generated more
|
||||
// coherence traffic on the lock *and* artifically extended the critical section
|
||||
// length while by virtue of passing control into the slow path.
|
||||
|
||||
// box is really RAX -- the following CMPXCHG depends on that binding
|
||||
// cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
|
||||
if (os::is_MP()) { lock(); }
|
||||
cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
|
||||
// There's no successor so we tried to regrab the lock.
|
||||
// If that didn't work, then another thread grabbed the
|
||||
// lock so we're done (and exit was a success).
|
||||
jccb (Assembler::notEqual, LSuccess);
|
||||
// Intentional fall-through into slow-path
|
||||
|
||||
bind (LGoSlowPath);
|
||||
orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
|
||||
jmpb (DONE_LABEL);
|
||||
|
||||
bind (LSuccess);
|
||||
testl (boxReg, 0); // set ICC.ZF=1 to indicate success
|
||||
jmpb (DONE_LABEL);
|
||||
|
||||
bind (Stacked);
|
||||
movptr(tmpReg, Address (boxReg, 0)); // re-fetch
|
||||
if (os::is_MP()) { lock(); }
|
||||
cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
|
||||
|
||||
#endif
|
||||
bind(DONE_LABEL);
|
||||
}
|
||||
#endif // COMPILER2
|
||||
|
||||
|
@ -574,6 +574,7 @@ static SpecialFlag const special_jvm_flags[] = {
|
||||
{ "PrintSafepointStatisticsCount", JDK_Version::jdk(11), JDK_Version::jdk(12), JDK_Version::jdk(13) },
|
||||
{ "TransmitErrorReport", JDK_Version::undefined(), JDK_Version::jdk(12), JDK_Version::jdk(13) },
|
||||
{ "ErrorReportServer", JDK_Version::undefined(), JDK_Version::jdk(12), JDK_Version::jdk(13) },
|
||||
{ "EmitSync", JDK_Version::undefined(), JDK_Version::jdk(12), JDK_Version::jdk(13) },
|
||||
|
||||
#ifdef TEST_VERIFY_SPECIAL_JVM_FLAGS
|
||||
{ "dep > obs", JDK_Version::jdk(9), JDK_Version::jdk(8), JDK_Version::undefined() },
|
||||
@ -3977,7 +3978,7 @@ jint Arguments::apply_ergo() {
|
||||
}
|
||||
}
|
||||
#ifdef COMPILER2
|
||||
if (!UseBiasedLocking || EmitSync != 0) {
|
||||
if (!UseBiasedLocking) {
|
||||
UseOptoBiasInlining = false;
|
||||
}
|
||||
#endif
|
||||
|
@ -830,10 +830,6 @@ define_pd_global(uint64_t,MaxRAM, 1ULL*G);
|
||||
experimental(ccstr, SyncKnobs, NULL, \
|
||||
"(Unstable) Various monitor synchronization tunables") \
|
||||
\
|
||||
experimental(intx, EmitSync, 0, \
|
||||
"(Unsafe, Unstable) " \
|
||||
"Control emission of inline sync fast-path code") \
|
||||
\
|
||||
product(intx, MonitorBound, 0, "Bound Monitor population") \
|
||||
range(0, max_jint) \
|
||||
\
|
||||
|
Loading…
x
Reference in New Issue
Block a user