8210381: Obsolete EmitSync

Reviewed-by: kvn, dcubed, mdoerr, mbaesken, shade
This commit is contained in:
Mikael Vidstedt 2018-09-06 18:06:24 -07:00
parent 84cf73f2a5
commit 0f68e5221f
7 changed files with 591 additions and 1015 deletions

@ -3378,26 +3378,18 @@ encode %{
// Load markOop from object into displaced_header.
__ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
// Always do locking in runtime.
if (EmitSync & 0x01) {
__ cmp(oop, zr);
return;
}
if (UseBiasedLocking && !UseOptoBiasInlining) {
__ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
}
// Handle existing monitor
if ((EmitSync & 0x02) == 0) {
// we can use AArch64's bit test and branch here but
// markoopDesc does not define a bit index just the bit value
// so assert in case the bit pos changes
# define __monitor_value_log2 1
assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
__ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
# undef __monitor_value_log2
}
// we can use AArch64's bit test and branch here but
// markoopDesc does not define a bit index just the bit value
// so assert in case the bit pos changes
# define __monitor_value_log2 1
assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
__ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
# undef __monitor_value_log2
// Set displaced_header to be (markOop of object | UNLOCK_VALUE).
__ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
@ -3455,63 +3447,62 @@ encode %{
__ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
__ b(cont);
__ b(cont);
__ bind(object_has_monitor);
// The object's monitor m is unlocked iff m->owner == NULL,
// otherwise m->owner may contain a thread or a stack address.
//
// Try to CAS m->owner from NULL to current thread.
__ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
__ mov(disp_hdr, zr);
__ bind(object_has_monitor);
// The object's monitor m is unlocked iff m->owner == NULL,
// otherwise m->owner may contain a thread or a stack address.
//
// Try to CAS m->owner from NULL to current thread.
__ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
__ mov(disp_hdr, zr);
if (UseLSE) {
__ mov(rscratch1, disp_hdr);
__ casal(Assembler::xword, rscratch1, rthread, tmp);
__ cmp(rscratch1, disp_hdr);
} else {
Label retry_load, fail;
if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
__ prfm(Address(tmp), PSTL1STRM);
__ bind(retry_load);
__ ldaxr(rscratch1, tmp);
__ cmp(disp_hdr, rscratch1);
__ br(Assembler::NE, fail);
// use stlxr to ensure update is immediately visible
__ stlxr(rscratch1, rthread, tmp);
__ cbnzw(rscratch1, retry_load);
__ bind(fail);
if (UseLSE) {
__ mov(rscratch1, disp_hdr);
__ casal(Assembler::xword, rscratch1, rthread, tmp);
__ cmp(rscratch1, disp_hdr);
} else {
Label retry_load, fail;
if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) {
__ prfm(Address(tmp), PSTL1STRM);
}
// Label next;
// __ cmpxchgptr(/*oldv=*/disp_hdr,
// /*newv=*/rthread,
// /*addr=*/tmp,
// /*tmp=*/rscratch1,
// /*succeed*/next,
// /*fail*/NULL);
// __ bind(next);
// store a non-null value into the box.
__ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
// PPC port checks the following invariants
// #ifdef ASSERT
// bne(flag, cont);
// We have acquired the monitor, check some invariants.
// addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
// Invariant 1: _recursions should be 0.
// assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
// assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
// "monitor->_recursions should be 0", -1);
// Invariant 2: OwnerIsThread shouldn't be 0.
// assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
//assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
// "monitor->OwnerIsThread shouldn't be 0", -1);
// #endif
__ bind(retry_load);
__ ldaxr(rscratch1, tmp);
__ cmp(disp_hdr, rscratch1);
__ br(Assembler::NE, fail);
// use stlxr to ensure update is immediately visible
__ stlxr(rscratch1, rthread, tmp);
__ cbnzw(rscratch1, retry_load);
__ bind(fail);
}
// Label next;
// __ cmpxchgptr(/*oldv=*/disp_hdr,
// /*newv=*/rthread,
// /*addr=*/tmp,
// /*tmp=*/rscratch1,
// /*succeed*/next,
// /*fail*/NULL);
// __ bind(next);
// store a non-null value into the box.
__ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
// PPC port checks the following invariants
// #ifdef ASSERT
// bne(flag, cont);
// We have acquired the monitor, check some invariants.
// addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
// Invariant 1: _recursions should be 0.
// assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
// assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
// "monitor->_recursions should be 0", -1);
// Invariant 2: OwnerIsThread shouldn't be 0.
// assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
//assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
// "monitor->OwnerIsThread shouldn't be 0", -1);
// #endif
__ bind(cont);
// flag == EQ indicates success
// flag == NE indicates failure
@ -3533,12 +3524,6 @@ encode %{
assert_different_registers(oop, box, tmp, disp_hdr);
// Always do locking in runtime.
if (EmitSync & 0x01) {
__ cmp(oop, zr); // Oop can't be 0 here => always false.
return;
}
if (UseBiasedLocking && !UseOptoBiasInlining) {
__ biased_locking_exit(oop, tmp, cont);
}
@ -3552,10 +3537,8 @@ encode %{
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
__ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
__ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
}
__ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
__ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
// Check if it is still a light weight lock, this is is true if we
// see the stack address of the basicLock in the markOop of the
@ -3590,27 +3573,25 @@ encode %{
__ bind(cas_failed);
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
__ b(cont);
__ b(cont);
__ bind(object_has_monitor);
__ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
__ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
__ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
__ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
__ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
__ cmp(rscratch1, zr);
__ br(Assembler::NE, cont);
__ bind(object_has_monitor);
__ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
__ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
__ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
__ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
__ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
__ cmp(rscratch1, zr);
__ br(Assembler::NE, cont);
__ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
__ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
__ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
__ cmp(rscratch1, zr);
__ cbnz(rscratch1, cont);
// need a release store here
__ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
__ stlr(rscratch1, tmp); // rscratch1 is zero
}
__ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
__ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
__ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
__ cmp(rscratch1, zr);
__ cbnz(rscratch1, cont);
// need a release store here
__ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
__ stlr(rscratch1, tmp); // rscratch1 is zero
__ bind(cont);
// flag == EQ indicates success

@ -2848,12 +2848,6 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
// Always do locking in runtime.
if (EmitSync & 0x01) {
cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
return;
}
if (try_bias) {
biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
}
@ -2867,11 +2861,9 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
#endif // INCLUDE_RTM_OPT
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
// The object has an existing monitor iff (mark & monitor_value) != 0.
andi_(temp, displaced_header, markOopDesc::monitor_value);
bne(CCR0, object_has_monitor);
}
// The object has an existing monitor iff (mark & monitor_value) != 0.
andi_(temp, displaced_header, markOopDesc::monitor_value);
bne(CCR0, object_has_monitor);
// Set displaced_header to be (markOop of object | UNLOCK_VALUE).
ori(displaced_header, displaced_header, markOopDesc::unlocked_value);
@ -2914,48 +2906,46 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
b(cont);
b(cont);
bind(object_has_monitor);
// The object's monitor m is unlocked iff m->owner == NULL,
// otherwise m->owner may contain a thread or a stack address.
bind(object_has_monitor);
// The object's monitor m is unlocked iff m->owner == NULL,
// otherwise m->owner may contain a thread or a stack address.
#if INCLUDE_RTM_OPT
// Use the same RTM locking code in 32- and 64-bit VM.
if (use_rtm) {
rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
rtm_counters, method_data, profile_rtm, cont);
} else {
// Use the same RTM locking code in 32- and 64-bit VM.
if (use_rtm) {
rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
rtm_counters, method_data, profile_rtm, cont);
} else {
#endif // INCLUDE_RTM_OPT
// Try to CAS m->owner from NULL to current thread.
addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
cmpxchgd(/*flag=*/flag,
/*current_value=*/current_header,
/*compare_value=*/(intptr_t)0,
/*exchange_value=*/R16_thread,
/*where=*/temp,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_acquire_lock());
// Try to CAS m->owner from NULL to current thread.
addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
cmpxchgd(/*flag=*/flag,
/*current_value=*/current_header,
/*compare_value=*/(intptr_t)0,
/*exchange_value=*/R16_thread,
/*where=*/temp,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_acquire_lock());
// Store a non-null value into the box.
std(box, BasicLock::displaced_header_offset_in_bytes(), box);
// Store a non-null value into the box.
std(box, BasicLock::displaced_header_offset_in_bytes(), box);
# ifdef ASSERT
bne(flag, cont);
// We have acquired the monitor, check some invariants.
addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
// Invariant 1: _recursions should be 0.
//assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
# ifdef ASSERT
bne(flag, cont);
// We have acquired the monitor, check some invariants.
addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
// Invariant 1: _recursions should be 0.
//assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
"monitor->_recursions should be 0", -1);
# endif
# endif
#if INCLUDE_RTM_OPT
} // use_rtm()
} // use_rtm()
#endif
}
bind(cont);
// flag == EQ indicates success
@ -2970,12 +2960,6 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
Label cont;
Label object_has_monitor;
// Always do locking in runtime.
if (EmitSync & 0x01) {
cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
return;
}
if (try_bias) {
biased_locking_exit(flag, oop, current_header, cont);
}
@ -3002,13 +2986,11 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
beq(flag, cont);
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
// The object has an existing monitor iff (mark & monitor_value) != 0.
RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
andi_(R0, current_header, markOopDesc::monitor_value);
bne(CCR0, object_has_monitor);
}
// The object has an existing monitor iff (mark & monitor_value) != 0.
RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
andi_(R0, current_header, markOopDesc::monitor_value);
bne(CCR0, object_has_monitor);
// Check if it is still a light weight lock, this is is true if we see
// the stack address of the basicLock in the markOop of the object.
@ -3026,40 +3008,38 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
b(cont);
b(cont);
bind(object_has_monitor);
addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
bind(object_has_monitor);
addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
// It's inflated.
#if INCLUDE_RTM_OPT
if (use_rtm) {
Label L_regular_inflated_unlock;
// Clean monitor_value bit to get valid pointer
cmpdi(flag, temp, 0);
bne(flag, L_regular_inflated_unlock);
tend_();
b(cont);
bind(L_regular_inflated_unlock);
}
if (use_rtm) {
Label L_regular_inflated_unlock;
// Clean monitor_value bit to get valid pointer
cmpdi(flag, temp, 0);
bne(flag, L_regular_inflated_unlock);
tend_();
b(cont);
bind(L_regular_inflated_unlock);
}
#endif
ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
xorr(temp, R16_thread, temp); // Will be 0 if we are the owner.
orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
cmpdi(flag, temp, 0);
bne(flag, cont);
ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
xorr(temp, R16_thread, temp); // Will be 0 if we are the owner.
orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
cmpdi(flag, temp, 0);
bne(flag, cont);
ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header);
ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
orr(temp, temp, displaced_header); // Will be 0 if both are 0.
cmpdi(flag, temp, 0);
bne(flag, cont);
release();
std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
}
ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header);
ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
orr(temp, temp, displaced_header); // Will be 0 if both are 0.
cmpdi(flag, temp, 0);
bne(flag, cont);
release();
std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
bind(cont);
// flag == EQ indicates success

@ -3374,13 +3374,11 @@ void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Regis
}
// Handle existing monitor.
if ((EmitSync & 0x01) == 0) {
// The object has an existing monitor iff (mark & monitor_value) != 0.
guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
z_lr(temp, displacedHeader);
z_nill(temp, markOopDesc::monitor_value);
z_brne(object_has_monitor);
}
// The object has an existing monitor iff (mark & monitor_value) != 0.
guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
z_lr(temp, displacedHeader);
z_nill(temp, markOopDesc::monitor_value);
z_brne(object_has_monitor);
// Set mark to markOop | markOopDesc::unlocked_value.
z_oill(displacedHeader, markOopDesc::unlocked_value);
@ -3411,28 +3409,26 @@ void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Regis
z_bru(done);
if ((EmitSync & 0x01) == 0) {
Register zero = temp;
Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value.
bind(object_has_monitor);
// The object's monitor m is unlocked iff m->owner == NULL,
// otherwise m->owner may contain a thread or a stack address.
//
// Try to CAS m->owner from NULL to current thread.
z_lghi(zero, 0);
// If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
// Store a non-null value into the box.
z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
Register zero = temp;
Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value.
bind(object_has_monitor);
// The object's monitor m is unlocked iff m->owner == NULL,
// otherwise m->owner may contain a thread or a stack address.
//
// Try to CAS m->owner from NULL to current thread.
z_lghi(zero, 0);
// If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
// Store a non-null value into the box.
z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
#ifdef ASSERT
z_brne(done);
// We've acquired the monitor, check some invariants.
// Invariant 1: _recursions should be 0.
asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
"monitor->_recursions should be 0", -1);
z_ltgr(zero, zero); // Set CR=EQ.
z_brne(done);
// We've acquired the monitor, check some invariants.
// Invariant 1: _recursions should be 0.
asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
"monitor->_recursions should be 0", -1);
z_ltgr(zero, zero); // Set CR=EQ.
#endif
}
bind(done);
BLOCK_COMMENT("} compiler_fast_lock_object");
@ -3461,13 +3457,11 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg
z_bre(done);
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
// The object has an existing monitor iff (mark & monitor_value) != 0.
z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
z_nill(currentHeader, markOopDesc::monitor_value);
z_brne(object_has_monitor);
}
// The object has an existing monitor iff (mark & monitor_value) != 0.
z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
z_nill(currentHeader, markOopDesc::monitor_value);
z_brne(object_has_monitor);
// Check if it is still a light weight lock, this is true if we see
// the stack address of the basicLock in the markOop of the object
@ -3477,20 +3471,18 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg
z_bru(done); // Csg sets CR as desired.
// Handle existing monitor.
if ((EmitSync & 0x02) == 0) {
bind(object_has_monitor);
z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set.
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
z_brne(done);
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
z_brne(done);
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
z_brne(done);
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
z_brne(done);
z_release();
z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
}
bind(object_has_monitor);
z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set.
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
z_brne(done);
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
z_brne(done);
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
z_brne(done);
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
z_brne(done);
z_release();
z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
bind(done);

@ -2648,195 +2648,92 @@ void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
}
if (EmitSync & 1) {
mov(3, Rscratch);
st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
cmp(SP, G0);
return ;
}
if (EmitSync & 2) {
// Fetch object's markword
ld_ptr(mark_addr, Rmark);
if (try_bias) {
biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
}
// Save Rbox in Rscratch to be used for the cas operation
mov(Rbox, Rscratch);
// set Rmark to markOop | markOopDesc::unlocked_value
or3(Rmark, markOopDesc::unlocked_value, Rmark);
// Initialize the box. (Must happen before we update the object mark!)
st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
// compare object markOop with Rmark and if equal exchange Rscratch with object markOop
assert(mark_addr.disp() == 0, "cas must take a zero displacement");
cas_ptr(mark_addr.base(), Rmark, Rscratch);
// if compare/exchange succeeded we found an unlocked object and we now have locked it
// hence we are done
cmp(Rmark, Rscratch);
sub(Rscratch, STACK_BIAS, Rscratch);
brx(Assembler::equal, false, Assembler::pt, done);
delayed()->sub(Rscratch, SP, Rscratch); //pull next instruction into delay slot
// we did not find an unlocked object so see if this is a recursive case
// sub(Rscratch, SP, Rscratch);
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
andcc(Rscratch, 0xfffff003, Rscratch);
st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
bind (done);
return ;
}
Label Egress ;
if (EmitSync & 256) {
Label IsInflated ;
ld_ptr(mark_addr, Rmark); // fetch obj->mark
// Triage: biased, stack-locked, neutral, inflated
if (try_bias) {
biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
// Invariant: if control reaches this point in the emitted stream
// then Rmark has not been modified.
}
// Store mark into displaced mark field in the on-stack basic-lock "box"
// Critically, this must happen before the CAS
// Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
andcc(Rmark, 2, G0);
brx(Assembler::notZero, false, Assembler::pn, IsInflated);
delayed()->
// Try stack-lock acquisition.
// Beware: the 1st instruction is in a delay slot
mov(Rbox, Rscratch);
or3(Rmark, markOopDesc::unlocked_value, Rmark);
assert(mark_addr.disp() == 0, "cas must take a zero displacement");
cas_ptr(mark_addr.base(), Rmark, Rscratch);
cmp(Rmark, Rscratch);
brx(Assembler::equal, false, Assembler::pt, done);
delayed()->sub(Rscratch, SP, Rscratch);
// Stack-lock attempt failed - check for recursive stack-lock.
// See the comments below about how we might remove this case.
sub(Rscratch, STACK_BIAS, Rscratch);
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
andcc(Rscratch, 0xfffff003, Rscratch);
br(Assembler::always, false, Assembler::pt, done);
delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
bind(IsInflated);
if (EmitSync & 64) {
// If m->owner != null goto IsLocked
// Pessimistic form: Test-and-CAS vs CAS
// The optimistic form avoids RTS->RTO cache line upgrades.
ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
andcc(Rscratch, Rscratch, G0);
brx(Assembler::notZero, false, Assembler::pn, done);
delayed()->nop();
// m->owner == null : it's unlocked.
}
// Try to CAS m->owner from null to Self
// Invariant: if we acquire the lock then _recursions should be 0.
add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
mov(G2_thread, Rscratch);
cas_ptr(Rmark, G0, Rscratch);
cmp(Rscratch, G0);
// Intentional fall-through into done
} else {
// Aggressively avoid the Store-before-CAS penalty
// Defer the store into box->dhw until after the CAS
Label IsInflated, Recursive ;
// Aggressively avoid the Store-before-CAS penalty
// Defer the store into box->dhw until after the CAS
Label IsInflated, Recursive ;
// Anticipate CAS -- Avoid RTS->RTO upgrade
// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
ld_ptr(mark_addr, Rmark); // fetch obj->mark
// Triage: biased, stack-locked, neutral, inflated
ld_ptr(mark_addr, Rmark); // fetch obj->mark
// Triage: biased, stack-locked, neutral, inflated
if (try_bias) {
biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
// Invariant: if control reaches this point in the emitted stream
// then Rmark has not been modified.
}
andcc(Rmark, 2, G0);
brx(Assembler::notZero, false, Assembler::pn, IsInflated);
delayed()-> // Beware - dangling delay-slot
// Try stack-lock acquisition.
// Transiently install BUSY (0) encoding in the mark word.
// if the CAS of 0 into the mark was successful then we execute:
// ST box->dhw = mark -- save fetched mark in on-stack basiclock box
// ST obj->mark = box -- overwrite transient 0 value
// This presumes TSO, of course.
mov(0, Rscratch);
or3(Rmark, markOopDesc::unlocked_value, Rmark);
assert(mark_addr.disp() == 0, "cas must take a zero displacement");
cas_ptr(mark_addr.base(), Rmark, Rscratch);
// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
cmp(Rscratch, Rmark);
brx(Assembler::notZero, false, Assembler::pn, Recursive);
delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
if (counters != NULL) {
cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
}
ba(done);
delayed()->st_ptr(Rbox, mark_addr);
bind(Recursive);
// Stack-lock attempt failed - check for recursive stack-lock.
// Tests show that we can remove the recursive case with no impact
// on refworkload 0.83. If we need to reduce the size of the code
// emitted by compiler_lock_object() the recursive case is perfect
// candidate.
//
// A more extreme idea is to always inflate on stack-lock recursion.
// This lets us eliminate the recursive checks in compiler_lock_object
// and compiler_unlock_object and the (box->dhw == 0) encoding.
// A brief experiment - requiring changes to synchronizer.cpp, interpreter,
// and showed a performance *increase*. In the same experiment I eliminated
// the fast-path stack-lock code from the interpreter and always passed
// control to the "slow" operators in synchronizer.cpp.
// RScratch contains the fetched obj->mark value from the failed CAS.
sub(Rscratch, STACK_BIAS, Rscratch);
sub(Rscratch, SP, Rscratch);
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
andcc(Rscratch, 0xfffff003, Rscratch);
if (counters != NULL) {
// Accounting needs the Rscratch register
st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
ba_short(done);
} else {
ba(done);
delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
}
bind (IsInflated);
// Try to CAS m->owner from null to Self
// Invariant: if we acquire the lock then _recursions should be 0.
add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
mov(G2_thread, Rscratch);
cas_ptr(Rmark, G0, Rscratch);
andcc(Rscratch, Rscratch, G0); // set ICCs for done: icc.zf iff success
// set icc.zf : 1=success 0=failure
// ST box->displaced_header = NonZero.
// Any non-zero value suffices:
// markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
// Intentional fall-through into done
if (try_bias) {
biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
// Invariant: if control reaches this point in the emitted stream
// then Rmark has not been modified.
}
andcc(Rmark, 2, G0);
brx(Assembler::notZero, false, Assembler::pn, IsInflated);
delayed()-> // Beware - dangling delay-slot
// Try stack-lock acquisition.
// Transiently install BUSY (0) encoding in the mark word.
// if the CAS of 0 into the mark was successful then we execute:
// ST box->dhw = mark -- save fetched mark in on-stack basiclock box
// ST obj->mark = box -- overwrite transient 0 value
// This presumes TSO, of course.
mov(0, Rscratch);
or3(Rmark, markOopDesc::unlocked_value, Rmark);
assert(mark_addr.disp() == 0, "cas must take a zero displacement");
cas_ptr(mark_addr.base(), Rmark, Rscratch);
// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
cmp(Rscratch, Rmark);
brx(Assembler::notZero, false, Assembler::pn, Recursive);
delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
if (counters != NULL) {
cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
}
ba(done);
delayed()->st_ptr(Rbox, mark_addr);
bind(Recursive);
// Stack-lock attempt failed - check for recursive stack-lock.
// Tests show that we can remove the recursive case with no impact
// on refworkload 0.83. If we need to reduce the size of the code
// emitted by compiler_lock_object() the recursive case is perfect
// candidate.
//
// A more extreme idea is to always inflate on stack-lock recursion.
// This lets us eliminate the recursive checks in compiler_lock_object
// and compiler_unlock_object and the (box->dhw == 0) encoding.
// A brief experiment - requiring changes to synchronizer.cpp, interpreter,
// and showed a performance *increase*. In the same experiment I eliminated
// the fast-path stack-lock code from the interpreter and always passed
// control to the "slow" operators in synchronizer.cpp.
// RScratch contains the fetched obj->mark value from the failed CAS.
sub(Rscratch, STACK_BIAS, Rscratch);
sub(Rscratch, SP, Rscratch);
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
andcc(Rscratch, 0xfffff003, Rscratch);
if (counters != NULL) {
// Accounting needs the Rscratch register
st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
ba_short(done);
} else {
ba(done);
delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
}
bind (IsInflated);
// Try to CAS m->owner from null to Self
// Invariant: if we acquire the lock then _recursions should be 0.
add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
mov(G2_thread, Rscratch);
cas_ptr(Rmark, G0, Rscratch);
andcc(Rscratch, Rscratch, G0); // set ICCs for done: icc.zf iff success
// set icc.zf : 1=success 0=failure
// ST box->displaced_header = NonZero.
// Any non-zero value suffices:
// markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
// Intentional fall-through into done
bind (done);
}
@ -2848,30 +2745,6 @@ void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
Label done ;
if (EmitSync & 4) {
cmp(SP, G0);
return ;
}
if (EmitSync & 8) {
if (try_bias) {
biased_locking_exit(mark_addr, Rscratch, done);
}
// Test first if it is a fast recursive unlock
ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
br_null_short(Rmark, Assembler::pt, done);
// Check if it is still a light weight lock, this is is true if we see
// the stack address of the basicLock in the markOop of the object
assert(mark_addr.disp() == 0, "cas must take a zero displacement");
cas_ptr(mark_addr.base(), Rbox, Rmark);
ba(done);
delayed()->cmp(Rbox, Rmark);
bind(done);
return ;
}
// Beware ... If the aggregate size of the code emitted by CLO and CUO is
// is too large performance rolls abruptly off a cliff.
// This could be related to inlining policies, code cache management, or
@ -2902,105 +2775,39 @@ void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
// close the resultant (and rare) race by having contended threads in
// monitorenter periodically poll _owner.
if (EmitSync & 1024) {
// Emit code to check that _owner == Self
// We could fold the _owner test into subsequent code more efficiently
// than using a stand-alone check, but since _owner checking is off by
// default we don't bother. We also might consider predicating the
// _owner==Self check on Xcheck:jni or running on a debug build.
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), Rscratch);
orcc(Rscratch, G0, G0);
brx(Assembler::notZero, false, Assembler::pn, done);
delayed()->nop();
}
// 1-0 form : avoids CAS and MEMBAR in the common case
// Do not bother to ratify that m->Owner == Self.
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
orcc(Rbox, G0, G0);
brx(Assembler::notZero, false, Assembler::pn, done);
delayed()->
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
orcc(Rbox, Rscratch, G0);
brx(Assembler::zero, false, Assembler::pt, done);
delayed()->
st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
if (EmitSync & 512) {
// classic lock release code absent 1-0 locking
// m->Owner = null;
// membar #storeload
// if (m->cxq|m->EntryList) == null goto Success
// if (m->succ != null) goto Success
// if CAS (&m->Owner,0,Self) != 0 goto Success
// goto SlowPath
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
orcc(Rbox, G0, G0);
brx(Assembler::notZero, false, Assembler::pn, done);
delayed()->nop();
st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
if (os::is_MP()) { membar(StoreLoad); }
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
orcc(Rbox, Rscratch, G0);
brx(Assembler::zero, false, Assembler::pt, done);
delayed()->
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
andcc(Rscratch, Rscratch, G0);
brx(Assembler::notZero, false, Assembler::pt, done);
delayed()->andcc(G0, G0, G0);
add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
mov(G2_thread, Rscratch);
cas_ptr(Rmark, G0, Rscratch);
cmp(Rscratch, G0);
// invert icc.zf and goto done
brx(Assembler::notZero, false, Assembler::pt, done);
delayed()->cmp(G0, G0);
br(Assembler::always, false, Assembler::pt, done);
delayed()->cmp(G0, 1);
} else {
// 1-0 form : avoids CAS and MEMBAR in the common case
// Do not bother to ratify that m->Owner == Self.
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
orcc(Rbox, G0, G0);
brx(Assembler::notZero, false, Assembler::pn, done);
delayed()->
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
orcc(Rbox, Rscratch, G0);
if (EmitSync & 16384) {
// As an optional optimization, if (EntryList|cxq) != null and _succ is null then
// we should transfer control directly to the slow-path.
// This test makes the reacquire operation below very infrequent.
// The logic is equivalent to :
// if (cxq|EntryList) == null : Owner=null; goto Success
// if succ == null : goto SlowPath
// Owner=null; membar #storeload
// if succ != null : goto Success
// if CAS(&Owner,null,Self) != null goto Success
// goto SlowPath
brx(Assembler::zero, true, Assembler::pt, done);
delayed()->
st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
andcc(Rscratch, Rscratch, G0) ;
brx(Assembler::zero, false, Assembler::pt, done);
delayed()->orcc(G0, 1, G0);
st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
} else {
brx(Assembler::zero, false, Assembler::pt, done);
delayed()->
st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
}
if (os::is_MP()) { membar(StoreLoad); }
// Check that _succ is (or remains) non-zero
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
andcc(Rscratch, Rscratch, G0);
brx(Assembler::notZero, false, Assembler::pt, done);
delayed()->andcc(G0, G0, G0);
add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
mov(G2_thread, Rscratch);
cas_ptr(Rmark, G0, Rscratch);
cmp(Rscratch, G0);
// invert icc.zf and goto done
// A slightly better v8+/v9 idiom would be the following:
// movrnz Rscratch,1,Rscratch
// ba done
// xorcc Rscratch,1,G0
// In v8+ mode the idiom would be valid IFF Rscratch was a G or O register
brx(Assembler::notZero, false, Assembler::pt, done);
delayed()->cmp(G0, G0);
br(Assembler::always, false, Assembler::pt, done);
delayed()->cmp(G0, 1);
}
if (os::is_MP()) { membar(StoreLoad); }
// Check that _succ is (or remains) non-zero
ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
andcc(Rscratch, Rscratch, G0);
brx(Assembler::notZero, false, Assembler::pt, done);
delayed()->andcc(G0, G0, G0);
add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
mov(G2_thread, Rscratch);
cas_ptr(Rmark, G0, Rscratch);
cmp(Rscratch, G0);
// invert icc.zf and goto done
// A slightly better v8+/v9 idiom would be the following:
// movrnz Rscratch,1,Rscratch
// ba done
// xorcc Rscratch,1,G0
// In v8+ mode the idiom would be valid IFF Rscratch was a G or O register
brx(Assembler::notZero, false, Assembler::pt, done);
delayed()->cmp(G0, G0);
br(Assembler::always, false, Assembler::pt, done);
delayed()->cmp(G0, 1);
bind (LStacked);
// Consider: we could replace the expensive CAS in the exit

@ -1721,227 +1721,160 @@ void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg
if (counters != NULL) {
atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
}
if (EmitSync & 1) {
// set box->dhw = markOopDesc::unused_mark()
// Force all sync thru slow-path: slow_enter() and slow_exit()
movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
cmpptr (rsp, (int32_t)NULL_WORD);
} else {
// Possible cases that we'll encounter in fast_lock
// ------------------------------------------------
// * Inflated
// -- unlocked
// -- Locked
// = by self
// = by other
// * biased
// -- by Self
// -- by other
// * neutral
// * stack-locked
// -- by self
// = sp-proximity test hits
// = sp-proximity test generates false-negative
// -- by other
//
Label IsInflated, DONE_LABEL;
// Possible cases that we'll encounter in fast_lock
// ------------------------------------------------
// * Inflated
// -- unlocked
// -- Locked
// = by self
// = by other
// * biased
// -- by Self
// -- by other
// * neutral
// * stack-locked
// -- by self
// = sp-proximity test hits
// = sp-proximity test generates false-negative
// -- by other
//
// it's stack-locked, biased or neutral
// TODO: optimize away redundant LDs of obj->mark and improve the markword triage
// order to reduce the number of conditional branches in the most common cases.
// Beware -- there's a subtle invariant that fetch of the markword
// at [FETCH], below, will never observe a biased encoding (*101b).
// If this invariant is not held we risk exclusion (safety) failure.
if (UseBiasedLocking && !UseOptoBiasInlining) {
biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
}
Label IsInflated, DONE_LABEL;
// it's stack-locked, biased or neutral
// TODO: optimize away redundant LDs of obj->mark and improve the markword triage
// order to reduce the number of conditional branches in the most common cases.
// Beware -- there's a subtle invariant that fetch of the markword
// at [FETCH], below, will never observe a biased encoding (*101b).
// If this invariant is not held we risk exclusion (safety) failure.
if (UseBiasedLocking && !UseOptoBiasInlining) {
biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
}
#if INCLUDE_RTM_OPT
if (UseRTMForStackLocks && use_rtm) {
rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
stack_rtm_counters, method_data, profile_rtm,
DONE_LABEL, IsInflated);
}
if (UseRTMForStackLocks && use_rtm) {
rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
stack_rtm_counters, method_data, profile_rtm,
DONE_LABEL, IsInflated);
}
#endif // INCLUDE_RTM_OPT
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
jccb(Assembler::notZero, IsInflated);
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
jccb(Assembler::notZero, IsInflated);
// Attempt stack-locking ...
orptr (tmpReg, markOopDesc::unlocked_value);
movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
if (os::is_MP()) {
lock();
}
cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
if (counters != NULL) {
cond_inc32(Assembler::equal,
ExternalAddress((address)counters->fast_path_entry_count_addr()));
}
jcc(Assembler::equal, DONE_LABEL); // Success
// Attempt stack-locking ...
orptr (tmpReg, markOopDesc::unlocked_value);
movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
if (os::is_MP()) {
lock();
}
cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
if (counters != NULL) {
cond_inc32(Assembler::equal,
ExternalAddress((address)counters->fast_path_entry_count_addr()));
}
jcc(Assembler::equal, DONE_LABEL); // Success
// Recursive locking.
// The object is stack-locked: markword contains stack pointer to BasicLock.
// Locked by current thread if difference with current SP is less than one page.
subptr(tmpReg, rsp);
// Next instruction set ZFlag == 1 (Success) if difference is less then one page.
andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
movptr(Address(boxReg, 0), tmpReg);
if (counters != NULL) {
cond_inc32(Assembler::equal,
ExternalAddress((address)counters->fast_path_entry_count_addr()));
}
jmp(DONE_LABEL);
// Recursive locking.
// The object is stack-locked: markword contains stack pointer to BasicLock.
// Locked by current thread if difference with current SP is less than one page.
subptr(tmpReg, rsp);
// Next instruction set ZFlag == 1 (Success) if difference is less then one page.
andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
movptr(Address(boxReg, 0), tmpReg);
if (counters != NULL) {
cond_inc32(Assembler::equal,
ExternalAddress((address)counters->fast_path_entry_count_addr()));
}
jmp(DONE_LABEL);
bind(IsInflated);
// The object is inflated. tmpReg contains pointer to ObjectMonitor* + markOopDesc::monitor_value
bind(IsInflated);
// The object is inflated. tmpReg contains pointer to ObjectMonitor* + markOopDesc::monitor_value
#if INCLUDE_RTM_OPT
// Use the same RTM locking code in 32- and 64-bit VM.
if (use_rtm) {
rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
rtm_counters, method_data, profile_rtm, DONE_LABEL);
} else {
// Use the same RTM locking code in 32- and 64-bit VM.
if (use_rtm) {
rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
rtm_counters, method_data, profile_rtm, DONE_LABEL);
} else {
#endif // INCLUDE_RTM_OPT
#ifndef _LP64
// The object is inflated.
// The object is inflated.
// boxReg refers to the on-stack BasicLock in the current frame.
// We'd like to write:
// set box->_displaced_header = markOopDesc::unused_mark(). Any non-0 value suffices.
// This is convenient but results a ST-before-CAS penalty. The following CAS suffers
// additional latency as we have another ST in the store buffer that must drain.
// boxReg refers to the on-stack BasicLock in the current frame.
// We'd like to write:
// set box->_displaced_header = markOopDesc::unused_mark(). Any non-0 value suffices.
// This is convenient but results a ST-before-CAS penalty. The following CAS suffers
// additional latency as we have another ST in the store buffer that must drain.
if (EmitSync & 8192) {
movptr(Address(boxReg, 0), 3); // results in ST-before-CAS penalty
get_thread (scrReg);
movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
movptr(tmpReg, NULL_WORD); // consider: xor vs mov
if (os::is_MP()) {
lock();
}
cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
} else
if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
// register juggle because we need tmpReg for cmpxchgptr below
movptr(scrReg, boxReg);
movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
// avoid ST-before-CAS
// register juggle because we need tmpReg for cmpxchgptr below
movptr(scrReg, boxReg);
movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
// Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
// prefetchw [eax + Offset(_owner)-2]
prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
}
// Optimistic form: consider XORL tmpReg,tmpReg
movptr(tmpReg, NULL_WORD);
if ((EmitSync & 64) == 0) {
// Optimistic form: consider XORL tmpReg,tmpReg
movptr(tmpReg, NULL_WORD);
} else {
// Can suffer RTS->RTO upgrades on shared or cold $ lines
// Test-And-CAS instead of CAS
movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // rax, = m->_owner
testptr(tmpReg, tmpReg); // Locked ?
jccb (Assembler::notZero, DONE_LABEL);
}
// Appears unlocked - try to swing _owner from null to non-null.
// Ideally, I'd manifest "Self" with get_thread and then attempt
// to CAS the register containing Self into m->Owner.
// But we don't have enough registers, so instead we can either try to CAS
// rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
// we later store "Self" into m->Owner. Transiently storing a stack address
// (rsp or the address of the box) into m->owner is harmless.
// Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
if (os::is_MP()) {
lock();
}
cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
// If we weren't able to swing _owner from NULL to the BasicLock
// then take the slow path.
jccb (Assembler::notZero, DONE_LABEL);
// update _owner from BasicLock to thread
get_thread (scrReg); // beware: clobbers ICCs
movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
// Appears unlocked - try to swing _owner from null to non-null.
// Ideally, I'd manifest "Self" with get_thread and then attempt
// to CAS the register containing Self into m->Owner.
// But we don't have enough registers, so instead we can either try to CAS
// rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
// we later store "Self" into m->Owner. Transiently storing a stack address
// (rsp or the address of the box) into m->owner is harmless.
// Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
if (os::is_MP()) {
lock();
}
cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
// If we weren't able to swing _owner from NULL to the BasicLock
// then take the slow path.
jccb (Assembler::notZero, DONE_LABEL);
// update _owner from BasicLock to thread
get_thread (scrReg); // beware: clobbers ICCs
movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
// If the CAS fails we can either retry or pass control to the slow-path.
// We use the latter tactic.
// Pass the CAS result in the icc.ZFlag into DONE_LABEL
// If the CAS was successful ...
// Self has acquired the lock
// Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
// Intentional fall-through into DONE_LABEL ...
} else {
movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())); // results in ST-before-CAS penalty
movptr(boxReg, tmpReg);
// Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
// prefetchw [eax + Offset(_owner)-2]
prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
}
if ((EmitSync & 64) == 0) {
// Optimistic form
xorptr (tmpReg, tmpReg);
} else {
// Can suffer RTS->RTO upgrades on shared or cold $ lines
movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // rax, = m->_owner
testptr(tmpReg, tmpReg); // Locked ?
jccb (Assembler::notZero, DONE_LABEL);
}
// Appears unlocked - try to swing _owner from null to non-null.
// Use either "Self" (in scr) or rsp as thread identity in _owner.
// Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
get_thread (scrReg);
if (os::is_MP()) {
lock();
}
cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
// If the CAS fails we can either retry or pass control to the slow-path.
// We use the latter tactic.
// Pass the CAS result in the icc.ZFlag into DONE_LABEL
// If the CAS was successful ...
// Self has acquired the lock
// Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
// Intentional fall-through into DONE_LABEL ...
}
// If the CAS fails we can either retry or pass control to the slow-path.
// We use the latter tactic.
// Pass the CAS result in the icc.ZFlag into DONE_LABEL
// If the CAS was successful ...
// Self has acquired the lock
// Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
// Intentional fall-through into DONE_LABEL ...
#else // _LP64
// It's inflated
movq(scrReg, tmpReg);
xorq(tmpReg, tmpReg);
// It's inflated
movq(scrReg, tmpReg);
xorq(tmpReg, tmpReg);
if (os::is_MP()) {
lock();
}
cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
// Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
// Without cast to int32_t movptr will destroy r10 which is typically obj.
movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
// Intentional fall-through into DONE_LABEL ...
// Propagate ICC.ZF from CAS above into DONE_LABEL.
if (os::is_MP()) {
lock();
}
cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
// Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
// Without cast to int32_t movptr will destroy r10 which is typically obj.
movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
// Intentional fall-through into DONE_LABEL ...
// Propagate ICC.ZF from CAS above into DONE_LABEL.
#endif // _LP64
#if INCLUDE_RTM_OPT
} // use_rtm()
} // use_rtm()
#endif
// DONE_LABEL is a hot target - we'd really like to place it at the
// start of cache line by padding with NOPs.
// See the AMD and Intel software optimization manuals for the
// most efficient "long" NOP encodings.
// Unfortunately none of our alignment mechanisms suffice.
bind(DONE_LABEL);
// DONE_LABEL is a hot target - we'd really like to place it at the
// start of cache line by padding with NOPs.
// See the AMD and Intel software optimization manuals for the
// most efficient "long" NOP encodings.
// Unfortunately none of our alignment mechanisms suffice.
bind(DONE_LABEL);
// At DONE_LABEL the icc ZFlag is set as follows ...
// Fast_Unlock uses the same protocol.
// ZFlag == 1 -> Success
// ZFlag == 0 -> Failure - force control through the slow-path
}
// At DONE_LABEL the icc ZFlag is set as follows ...
// Fast_Unlock uses the same protocol.
// ZFlag == 1 -> Success
// ZFlag == 0 -> Failure - force control through the slow-path
}
// obj: object to unlock
@ -1980,293 +1913,179 @@ void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpR
assert(boxReg == rax, "");
assert_different_registers(objReg, boxReg, tmpReg);
if (EmitSync & 4) {
// Disable - inhibit all inlining. Force control through the slow-path
cmpptr (rsp, 0);
} else {
Label DONE_LABEL, Stacked, CheckSucc;
Label DONE_LABEL, Stacked, CheckSucc;
// Critically, the biased locking test must have precedence over
// and appear before the (box->dhw == 0) recursive stack-lock test.
if (UseBiasedLocking && !UseOptoBiasInlining) {
biased_locking_exit(objReg, tmpReg, DONE_LABEL);
}
#if INCLUDE_RTM_OPT
if (UseRTMForStackLocks && use_rtm) {
assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
Label L_regular_unlock;
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
xend(); // otherwise end...
jmp(DONE_LABEL); // ... and we're done
bind(L_regular_unlock);
}
#endif
cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
testptr(tmpReg, markOopDesc::monitor_value); // Inflated?
jccb (Assembler::zero, Stacked);
// It's inflated.
#if INCLUDE_RTM_OPT
if (use_rtm) {
Label L_regular_inflated_unlock;
int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
movptr(boxReg, Address(tmpReg, owner_offset));
testptr(boxReg, boxReg);
jccb(Assembler::notZero, L_regular_inflated_unlock);
xend();
jmpb(DONE_LABEL);
bind(L_regular_inflated_unlock);
}
#endif
// Despite our balanced locking property we still check that m->_owner == Self
// as java routines or native JNI code called by this thread might
// have released the lock.
// Refer to the comments in synchronizer.cpp for how we might encode extra
// state in _succ so we can avoid fetching EntryList|cxq.
//
// I'd like to add more cases in fast_lock() and fast_unlock() --
// such as recursive enter and exit -- but we have to be wary of
// I$ bloat, T$ effects and BP$ effects.
//
// If there's no contention try a 1-0 exit. That is, exit without
// a costly MEMBAR or CAS. See synchronizer.cpp for details on how
// we detect and recover from the race that the 1-0 exit admits.
//
// Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
// before it STs null into _owner, releasing the lock. Updates
// to data protected by the critical section must be visible before
// we drop the lock (and thus before any other thread could acquire
// the lock and observe the fields protected by the lock).
// IA32's memory-model is SPO, so STs are ordered with respect to
// each other and there's no need for an explicit barrier (fence).
// See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
#ifndef _LP64
get_thread (boxReg);
if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
// prefetchw [ebx + Offset(_owner)-2]
prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
}
// Note that we could employ various encoding schemes to reduce
// the number of loads below (currently 4) to just 2 or 3.
// Refer to the comments in synchronizer.cpp.
// In practice the chain of fetches doesn't seem to impact performance, however.
xorptr(boxReg, boxReg);
if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
// Attempt to reduce branch density - AMD's branch predictor.
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
jccb (Assembler::notZero, DONE_LABEL);
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
jmpb (DONE_LABEL);
} else {
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
jccb (Assembler::notZero, DONE_LABEL);
movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
jccb (Assembler::notZero, CheckSucc);
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
jmpb (DONE_LABEL);
}
// The Following code fragment (EmitSync & 65536) improves the performance of
// contended applications and contended synchronization microbenchmarks.
// Unfortunately the emission of the code - even though not executed - causes regressions
// in scimark and jetstream, evidently because of $ effects. Replacing the code
// with an equal number of never-executed NOPs results in the same regression.
// We leave it off by default.
if ((EmitSync & 65536) != 0) {
Label LSuccess, LGoSlowPath ;
bind (CheckSucc);
// Optional pre-test ... it's safe to elide this
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
jccb(Assembler::zero, LGoSlowPath);
// We have a classic Dekker-style idiom:
// ST m->_owner = 0 ; MEMBAR; LD m->_succ
// There are a number of ways to implement the barrier:
// (1) lock:andl &m->_owner, 0
// is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
// LOCK: ANDL [ebx+Offset(_Owner)-2], 0
// Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
// (2) If supported, an explicit MFENCE is appealing.
// In older IA32 processors MFENCE is slower than lock:add or xchg
// particularly if the write-buffer is full as might be the case if
// if stores closely precede the fence or fence-equivalent instruction.
// See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
// as the situation has changed with Nehalem and Shanghai.
// (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
// The $lines underlying the top-of-stack should be in M-state.
// The locked add instruction is serializing, of course.
// (4) Use xchg, which is serializing
// mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
// (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
// The integer condition codes will tell us if succ was 0.
// Since _succ and _owner should reside in the same $line and
// we just stored into _owner, it's likely that the $line
// remains in M-state for the lock:orl.
//
// We currently use (3), although it's likely that switching to (2)
// is correct for the future.
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
if (os::is_MP()) {
lock(); addptr(Address(rsp, 0), 0);
}
// Ratify _succ remains non-null
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
jccb (Assembler::notZero, LSuccess);
xorptr(boxReg, boxReg); // box is really EAX
if (os::is_MP()) { lock(); }
cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
// There's no successor so we tried to regrab the lock with the
// placeholder value. If that didn't work, then another thread
// grabbed the lock so we're done (and exit was a success).
jccb (Assembler::notEqual, LSuccess);
// Since we're low on registers we installed rsp as a placeholding in _owner.
// Now install Self over rsp. This is safe as we're transitioning from
// non-null to non=null
get_thread (boxReg);
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
// Intentional fall-through into LGoSlowPath ...
bind (LGoSlowPath);
orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure
jmpb (DONE_LABEL);
bind (LSuccess);
xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success
jmpb (DONE_LABEL);
}
bind (Stacked);
// It's not inflated and it's not recursively stack-locked and it's not biased.
// It must be stack-locked.
// Try to reset the header to displaced header.
// The "box" value on the stack is stable, so we can reload
// and be assured we observe the same value as above.
movptr(tmpReg, Address(boxReg, 0));
if (os::is_MP()) {
lock();
}
cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
// Intention fall-thru into DONE_LABEL
// DONE_LABEL is a hot target - we'd really like to place it at the
// start of cache line by padding with NOPs.
// See the AMD and Intel software optimization manuals for the
// most efficient "long" NOP encodings.
// Unfortunately none of our alignment mechanisms suffice.
if ((EmitSync & 65536) == 0) {
bind (CheckSucc);
}
#else // _LP64
// It's inflated
if (EmitSync & 1024) {
// Emit code to check that _owner == Self
// We could fold the _owner test into subsequent code more efficiently
// than using a stand-alone check, but since _owner checking is off by
// default we don't bother. We also might consider predicating the
// _owner==Self check on Xcheck:jni or running on a debug build.
movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
xorptr(boxReg, r15_thread);
} else {
xorptr(boxReg, boxReg);
}
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
jccb (Assembler::notZero, DONE_LABEL);
movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
jccb (Assembler::notZero, CheckSucc);
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
jmpb (DONE_LABEL);
if ((EmitSync & 65536) == 0) {
// Try to avoid passing control into the slow_path ...
Label LSuccess, LGoSlowPath ;
bind (CheckSucc);
// The following optional optimization can be elided if necessary
// Effectively: if (succ == null) goto SlowPath
// The code reduces the window for a race, however,
// and thus benefits performance.
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
jccb (Assembler::zero, LGoSlowPath);
xorptr(boxReg, boxReg);
if ((EmitSync & 16) && os::is_MP()) {
xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
} else {
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
if (os::is_MP()) {
// Memory barrier/fence
// Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
// Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
// This is faster on Nehalem and AMD Shanghai/Barcelona.
// See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
// We might also restructure (ST Owner=0;barrier;LD _Succ) to
// (mov box,0; xchgq box, &m->Owner; LD _succ) .
lock(); addl(Address(rsp, 0), 0);
}
}
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
jccb (Assembler::notZero, LSuccess);
// Rare inopportune interleaving - race.
// The successor vanished in the small window above.
// The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
// We need to ensure progress and succession.
// Try to reacquire the lock.
// If that fails then the new owner is responsible for succession and this
// thread needs to take no further action and can exit via the fast path (success).
// If the re-acquire succeeds then pass control into the slow path.
// As implemented, this latter mode is horrible because we generated more
// coherence traffic on the lock *and* artifically extended the critical section
// length while by virtue of passing control into the slow path.
// box is really RAX -- the following CMPXCHG depends on that binding
// cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
if (os::is_MP()) { lock(); }
cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
// There's no successor so we tried to regrab the lock.
// If that didn't work, then another thread grabbed the
// lock so we're done (and exit was a success).
jccb (Assembler::notEqual, LSuccess);
// Intentional fall-through into slow-path
bind (LGoSlowPath);
orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
jmpb (DONE_LABEL);
bind (LSuccess);
testl (boxReg, 0); // set ICC.ZF=1 to indicate success
jmpb (DONE_LABEL);
}
bind (Stacked);
movptr(tmpReg, Address (boxReg, 0)); // re-fetch
if (os::is_MP()) { lock(); }
cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
if (EmitSync & 65536) {
bind (CheckSucc);
}
#endif
bind(DONE_LABEL);
// Critically, the biased locking test must have precedence over
// and appear before the (box->dhw == 0) recursive stack-lock test.
if (UseBiasedLocking && !UseOptoBiasInlining) {
biased_locking_exit(objReg, tmpReg, DONE_LABEL);
}
#if INCLUDE_RTM_OPT
if (UseRTMForStackLocks && use_rtm) {
assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
Label L_regular_unlock;
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
xend(); // otherwise end...
jmp(DONE_LABEL); // ... and we're done
bind(L_regular_unlock);
}
#endif
cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
testptr(tmpReg, markOopDesc::monitor_value); // Inflated?
jccb (Assembler::zero, Stacked);
// It's inflated.
#if INCLUDE_RTM_OPT
if (use_rtm) {
Label L_regular_inflated_unlock;
int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
movptr(boxReg, Address(tmpReg, owner_offset));
testptr(boxReg, boxReg);
jccb(Assembler::notZero, L_regular_inflated_unlock);
xend();
jmpb(DONE_LABEL);
bind(L_regular_inflated_unlock);
}
#endif
// Despite our balanced locking property we still check that m->_owner == Self
// as java routines or native JNI code called by this thread might
// have released the lock.
// Refer to the comments in synchronizer.cpp for how we might encode extra
// state in _succ so we can avoid fetching EntryList|cxq.
//
// I'd like to add more cases in fast_lock() and fast_unlock() --
// such as recursive enter and exit -- but we have to be wary of
// I$ bloat, T$ effects and BP$ effects.
//
// If there's no contention try a 1-0 exit. That is, exit without
// a costly MEMBAR or CAS. See synchronizer.cpp for details on how
// we detect and recover from the race that the 1-0 exit admits.
//
// Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
// before it STs null into _owner, releasing the lock. Updates
// to data protected by the critical section must be visible before
// we drop the lock (and thus before any other thread could acquire
// the lock and observe the fields protected by the lock).
// IA32's memory-model is SPO, so STs are ordered with respect to
// each other and there's no need for an explicit barrier (fence).
// See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
#ifndef _LP64
get_thread (boxReg);
// Note that we could employ various encoding schemes to reduce
// the number of loads below (currently 4) to just 2 or 3.
// Refer to the comments in synchronizer.cpp.
// In practice the chain of fetches doesn't seem to impact performance, however.
xorptr(boxReg, boxReg);
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
jccb (Assembler::notZero, DONE_LABEL);
movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
jccb (Assembler::notZero, CheckSucc);
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
jmpb (DONE_LABEL);
bind (Stacked);
// It's not inflated and it's not recursively stack-locked and it's not biased.
// It must be stack-locked.
// Try to reset the header to displaced header.
// The "box" value on the stack is stable, so we can reload
// and be assured we observe the same value as above.
movptr(tmpReg, Address(boxReg, 0));
if (os::is_MP()) {
lock();
}
cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
// Intention fall-thru into DONE_LABEL
// DONE_LABEL is a hot target - we'd really like to place it at the
// start of cache line by padding with NOPs.
// See the AMD and Intel software optimization manuals for the
// most efficient "long" NOP encodings.
// Unfortunately none of our alignment mechanisms suffice.
bind (CheckSucc);
#else // _LP64
// It's inflated
xorptr(boxReg, boxReg);
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
jccb (Assembler::notZero, DONE_LABEL);
movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
jccb (Assembler::notZero, CheckSucc);
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
jmpb (DONE_LABEL);
// Try to avoid passing control into the slow_path ...
Label LSuccess, LGoSlowPath ;
bind (CheckSucc);
// The following optional optimization can be elided if necessary
// Effectively: if (succ == null) goto SlowPath
// The code reduces the window for a race, however,
// and thus benefits performance.
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
jccb (Assembler::zero, LGoSlowPath);
xorptr(boxReg, boxReg);
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
if (os::is_MP()) {
// Memory barrier/fence
// Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
// Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
// This is faster on Nehalem and AMD Shanghai/Barcelona.
// See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
// We might also restructure (ST Owner=0;barrier;LD _Succ) to
// (mov box,0; xchgq box, &m->Owner; LD _succ) .
lock(); addl(Address(rsp, 0), 0);
}
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
jccb (Assembler::notZero, LSuccess);
// Rare inopportune interleaving - race.
// The successor vanished in the small window above.
// The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
// We need to ensure progress and succession.
// Try to reacquire the lock.
// If that fails then the new owner is responsible for succession and this
// thread needs to take no further action and can exit via the fast path (success).
// If the re-acquire succeeds then pass control into the slow path.
// As implemented, this latter mode is horrible because we generated more
// coherence traffic on the lock *and* artifically extended the critical section
// length while by virtue of passing control into the slow path.
// box is really RAX -- the following CMPXCHG depends on that binding
// cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
if (os::is_MP()) { lock(); }
cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
// There's no successor so we tried to regrab the lock.
// If that didn't work, then another thread grabbed the
// lock so we're done (and exit was a success).
jccb (Assembler::notEqual, LSuccess);
// Intentional fall-through into slow-path
bind (LGoSlowPath);
orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
jmpb (DONE_LABEL);
bind (LSuccess);
testl (boxReg, 0); // set ICC.ZF=1 to indicate success
jmpb (DONE_LABEL);
bind (Stacked);
movptr(tmpReg, Address (boxReg, 0)); // re-fetch
if (os::is_MP()) { lock(); }
cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
#endif
bind(DONE_LABEL);
}
#endif // COMPILER2

@ -574,6 +574,7 @@ static SpecialFlag const special_jvm_flags[] = {
{ "PrintSafepointStatisticsCount", JDK_Version::jdk(11), JDK_Version::jdk(12), JDK_Version::jdk(13) },
{ "TransmitErrorReport", JDK_Version::undefined(), JDK_Version::jdk(12), JDK_Version::jdk(13) },
{ "ErrorReportServer", JDK_Version::undefined(), JDK_Version::jdk(12), JDK_Version::jdk(13) },
{ "EmitSync", JDK_Version::undefined(), JDK_Version::jdk(12), JDK_Version::jdk(13) },
#ifdef TEST_VERIFY_SPECIAL_JVM_FLAGS
{ "dep > obs", JDK_Version::jdk(9), JDK_Version::jdk(8), JDK_Version::undefined() },
@ -3977,7 +3978,7 @@ jint Arguments::apply_ergo() {
}
}
#ifdef COMPILER2
if (!UseBiasedLocking || EmitSync != 0) {
if (!UseBiasedLocking) {
UseOptoBiasInlining = false;
}
#endif

@ -830,10 +830,6 @@ define_pd_global(uint64_t,MaxRAM, 1ULL*G);
experimental(ccstr, SyncKnobs, NULL, \
"(Unstable) Various monitor synchronization tunables") \
\
experimental(intx, EmitSync, 0, \
"(Unsafe, Unstable) " \
"Control emission of inline sync fast-path code") \
\
product(intx, MonitorBound, 0, "Bound Monitor population") \
range(0, max_jint) \
\