diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index b4c12ecd4a8..62831ee72ba 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -150,10 +150,12 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe Register oop = objectReg; Register box = boxReg; Register disp_hdr = tmpReg; + Register owner_addr = tmpReg; Register tmp = tmp2Reg; Label cont; Label object_has_monitor; Label count, no_count; + Label unlocked; assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight"); assert_different_registers(oop, box, tmp, disp_hdr); @@ -204,14 +206,40 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe b(cont); bind(notRecursive); + + // Compute owner address. + lea(owner_addr, Address(tmp, ObjectMonitor::owner_offset())); + + // Set owner to null. + // Release to satisfy the JMM + stlr(zr, owner_addr); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + + // Check if the entry lists are empty. ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset())); - ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset())); - orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0. - cmp(rscratch1, zr); // Sets flags for result - cbnz(rscratch1, cont); - // need a release store here - lea(tmp, Address(tmp, ObjectMonitor::owner_offset())); - stlr(zr, tmp); // set unowned + ldr(tmpReg, Address(tmp, ObjectMonitor::cxq_offset())); + orr(rscratch1, rscratch1, tmpReg); + cmp(rscratch1, zr); + br(Assembler::EQ, cont); // If so we are done. + + // Check if there is a successor. + ldr(rscratch1, Address(tmp, ObjectMonitor::succ_offset())); + cmp(rscratch1, zr); + br(Assembler::NE, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + str(tmp, Address(rthread, JavaThread::unlocked_inflated_monitor_offset())); + + cmp(zr, rthread); // Set Flag to NE => slow path + b(cont); + + bind(unlocked); + cmp(zr, zr); // Set Flag to EQ => fast path + + // Intentional fall-through bind(cont); // flag == EQ indicates success @@ -498,33 +526,41 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Regi bind(not_recursive); - Label release; const Register t2_owner_addr = t2; // Compute owner address. lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset())); + // Set owner to null. + // Release to satisfy the JMM + stlr(zr, t2_owner_addr); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + // Check if the entry lists are empty. ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset())); ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset())); orr(rscratch1, rscratch1, t3_t); cmp(rscratch1, zr); - br(Assembler::EQ, release); + br(Assembler::EQ, unlocked); // If so we are done. - // The owner may be anonymous and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - str(rthread, Address(t2_owner_addr)); + // Check if there is a successor. + ldr(rscratch1, Address(t1_monitor, ObjectMonitor::succ_offset())); + cmp(rscratch1, zr); + br(Assembler::NE, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + str(t1_monitor, Address(rthread, JavaThread::unlocked_inflated_monitor_offset())); + + cmp(zr, rthread); // Set Flag to NE => slow path b(slow_path); - - bind(release); - // Set owner to null. - // Release to satisfy the JMM - stlr(zr, t2_owner_addr); } bind(unlocked); decrement(Address(rthread, JavaThread::held_monitor_count_offset())); + cmp(zr, zr); // Set Flags to EQ => fast path #ifdef ASSERT // Check that unlocked label is reached with Flags == EQ. diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index a8635af9582..8d8e39b8bbc 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -2715,13 +2715,34 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe b(success); bind(notRecursive); + + // Set owner to null. + // Release to satisfy the JMM + release(); + li(temp, 0); + std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + + // Check if the entry lists are empty. ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header); ld(displaced_header, in_bytes(ObjectMonitor::cxq_offset()), current_header); orr(temp, temp, displaced_header); // Will be 0 if both are 0. cmpdi(flag, temp, 0); - bne(flag, failure); - release(); - std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header); + beq(flag, success); // If so we are done. + + // Check if there is a successor. + ld(temp, in_bytes(ObjectMonitor::succ_offset()), current_header); + cmpdi(flag, temp, 0); + bne(flag, success); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try + // to reacquire the lock in SharedRuntime::monitor_exit_helper(). + std(current_header, in_bytes(JavaThread::unlocked_inflated_monitor_offset()), R16_thread); + + crxor(flag, Assembler::equal, flag, Assembler::equal); // Set flag = NE => slow path + b(failure); // flag == EQ indicates success, decrement held monitor count // flag == NE indicates failure @@ -3028,27 +3049,39 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f bind(not_recursive); - Label release_; + Label set_eq_unlocked; const Register t2 = tmp2; + // Set owner to null. + // Release to satisfy the JMM + release(); + li(t, 0); + std(t, in_bytes(ObjectMonitor::owner_offset()), monitor); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + // Check if the entry lists are empty. ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor); ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor); orr(t, t, t2); cmpdi(CCR0, t, 0); - beq(CCR0, release_); + beq(CCR0, unlocked); // If so we are done. - // The owner may be anonymous and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - std(R16_thread, in_bytes(ObjectMonitor::owner_offset()), monitor); + // Check if there is a successor. + ld(t, in_bytes(ObjectMonitor::succ_offset()), monitor); + cmpdi(CCR0, t, 0); + bne(CCR0, set_eq_unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try + // to reacquire the lock in SharedRuntime::monitor_exit_helper(). + std(monitor, in_bytes(JavaThread::unlocked_inflated_monitor_offset()), R16_thread); + + crxor(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set flag = NE => slow path b(slow_path); - bind(release_); - // Set owner to null. - release(); - // t contains 0 - std(t, in_bytes(ObjectMonitor::owner_offset()), monitor); + bind(set_eq_unlocked); + crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set flag = EQ => fast path } bind(unlocked); diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index e2c9b9dd609..75f87e35adf 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -165,6 +165,7 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Register oop = objectReg; Register box = boxReg; Register disp_hdr = tmp1Reg; + Register owner_addr = tmp1Reg; Register tmp = tmp2Reg; Label object_has_monitor; // Finish fast lock successfully. MUST branch to with flag == 0 @@ -222,15 +223,33 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, j(unlocked); bind(notRecursive); - ld(t0, Address(tmp, ObjectMonitor::EntryList_offset())); - ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset())); - orr(t0, t0, disp_hdr); // Will be 0 if both are 0. - bnez(t0, slow_path); + // Compute owner address. + la(owner_addr, Address(tmp, ObjectMonitor::owner_offset())); - // need a release store here - la(tmp, Address(tmp, ObjectMonitor::owner_offset())); + // Set owner to null. + // Release to satisfy the JMM membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); - sd(zr, Address(tmp)); // set unowned + sd(zr, Address(owner_addr)); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + + // Check if the entry lists are empty. + ld(t0, Address(tmp, ObjectMonitor::EntryList_offset())); + ld(tmp1Reg, Address(tmp, ObjectMonitor::cxq_offset())); + orr(t0, t0, tmp1Reg); + beqz(t0, unlocked); // If so we are done. + + // Check if there is a successor. + ld(t0, Address(tmp, ObjectMonitor::succ_offset())); + bnez(t0, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + sd(tmp, Address(xthread, JavaThread::unlocked_inflated_monitor_offset())); + + mv(flag, 1); + j(slow_path); bind(unlocked); mv(flag, zr); @@ -534,28 +553,35 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, bind(not_recursive); - Label release; const Register tmp2_owner_addr = tmp2; // Compute owner address. la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset())); + // Set owner to null. + // Release to satisfy the JMM + membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + sd(zr, Address(tmp2_owner_addr)); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + // Check if the entry lists are empty. ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset())); ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset())); orr(t0, t0, tmp3_t); - beqz(t0, release); + beqz(t0, unlocked); // If so we are done. - // The owner may be anonymous and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - sd(xthread, Address(tmp2_owner_addr)); + // Check if there is a successor. + ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::succ_offset())); + bnez(tmp3_t, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try + // to reacquire the lock in SharedRuntime::monitor_exit_helper(). + sd(tmp1_monitor, Address(xthread, JavaThread::unlocked_inflated_monitor_offset())); + + mv(flag, 1); j(slow_path); - - bind(release); - // Set owner to null. - membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); - sd(zr, Address(tmp2_owner_addr)); } bind(unlocked); diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 6c26e17d5ce..af281345b14 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -3655,12 +3655,38 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg bind(not_recursive); - load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - z_brne(done); - load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); - z_brne(done); + NearLabel check_succ, set_eq_unlocked; + + // Set owner to null. + // Release to satisfy the JMM z_release(); - z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); + z_lghi(temp, 0); + z_stg(temp, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); + // We need a full fence after clearing owner to avoid stranding. + z_fence(); + + // Check if the entry lists are empty. + load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); + z_brne(check_succ); + load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); + z_bre(done); // If so we are done. + + bind(check_succ); + + // Check if there is a successor. + load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ))); + z_brne(set_eq_unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + z_xilf(currentHeader, markWord::monitor_value); + z_stg(currentHeader, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset())); + + z_ltgr(oop, oop); // Set flag = NE + z_bru(done); + + bind(set_eq_unlocked); + z_cr(temp, temp); // Set flag = EQ bind(done); @@ -6454,6 +6480,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag}; const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag}; + const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag}; const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag}; const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag}; @@ -6471,25 +6498,40 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis bind(not_recursive); - NearLabel not_ok; + NearLabel check_succ, set_eq_unlocked; + + // Set owner to null. + // Release to satisfy the JMM + z_release(); + z_lghi(tmp2, 0); + z_stg(tmp2 /*=0*/, owner_address); + // We need a full fence after clearing owner to avoid stranding. + z_fence(); + // Check if the entry lists are empty. load_and_test_long(tmp2, EntryList_address); - z_brne(not_ok); + z_brne(check_succ); load_and_test_long(tmp2, cxq_address); - z_brne(not_ok); + z_bre(unlocked); // If so we are done. - z_release(); - z_stg(tmp2 /*=0*/, owner_address); + bind(check_succ); - z_bru(unlocked); // CC = EQ here + // Check if there is a successor. + load_and_test_long(tmp2, succ_address); + z_brne(set_eq_unlocked); // If so we are done. - bind(not_ok); + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + if (!UseObjectMonitorTable) { + z_xilf(monitor, markWord::monitor_value); + } + z_stg(monitor, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset())); - // The owner may be anonymous, and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - z_stg(Z_thread, owner_address); - z_bru(slow_path); // CC = NE here + z_ltgr(obj, obj); // Set flag = NE + z_bru(slow_path); + + bind(set_eq_unlocked); + z_cr(tmp2, tmp2); // Set flag = EQ } bind(unlocked); diff --git a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp index 1990488d8a0..44f897529e7 100644 --- a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp @@ -80,8 +80,6 @@ int C2FastUnlockLightweightStub::max_size() const { void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { assert(_t == rax, "must be"); - Label restore_held_monitor_count_and_slow_path; - { // Restore lock-stack and handle the unlock in runtime. __ bind(_push_and_slow_path); @@ -91,61 +89,9 @@ void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { __ movptr(Address(_thread, _t), _obj); #endif __ addl(Address(_thread, JavaThread::lock_stack_top_offset()), oopSize); - } - - { // Restore held monitor count and slow path. - - __ bind(restore_held_monitor_count_and_slow_path); - __ bind(_slow_path); - // Restore held monitor count. - __ increment(Address(_thread, JavaThread::held_monitor_count_offset())); - // increment will always result in ZF = 0 (no overflows). + // addl will always result in ZF = 0 (no overflows). __ jmp(slow_path_continuation()); } - - { // Handle monitor medium path. - - __ bind(_check_successor); - - Label fix_zf_and_unlocked; - const Register monitor = _mark; - -#ifndef _LP64 - __ jmpb(restore_held_monitor_count_and_slow_path); -#else // _LP64 - const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); - const Address succ_address(monitor, ObjectMonitor::succ_offset() - monitor_tag); - const Address owner_address(monitor, ObjectMonitor::owner_offset() - monitor_tag); - - // successor null check. - __ cmpptr(succ_address, NULL_WORD); - __ jccb(Assembler::equal, restore_held_monitor_count_and_slow_path); - - // Release lock. - __ movptr(owner_address, NULL_WORD); - - // Fence. - // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. - __ lock(); __ addl(Address(rsp, 0), 0); - - // Recheck successor. - __ cmpptr(succ_address, NULL_WORD); - // Observed a successor after the release -> fence we have handed off the monitor - __ jccb(Assembler::notEqual, fix_zf_and_unlocked); - - // Try to relock, if it fails the monitor has been handed over - // TODO: Caveat, this may fail due to deflation, which does - // not handle the monitor handoff. Currently only works - // due to the responsible thread. - __ xorptr(rax, rax); - __ lock(); __ cmpxchgptr(_thread, owner_address); - __ jccb (Assembler::equal, restore_held_monitor_count_and_slow_path); -#endif - - __ bind(fix_zf_and_unlocked); - __ xorl(rax, rax); - __ jmp(unlocked_continuation()); - } } #undef __ diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index c2801a791cb..839745f76ec 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -459,87 +459,43 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t // IA32's memory-model is SPO, so STs are ordered with respect to // each other and there's no need for an explicit barrier (fence). // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. -#ifndef _LP64 - // Note that we could employ various encoding schemes to reduce - // the number of loads below (currently 4) to just 2 or 3. - // Refer to the comments in synchronizer.cpp. - // In practice the chain of fetches doesn't seem to impact performance, however. - xorptr(boxReg, boxReg); - orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); - jccb (Assembler::notZero, DONE_LABEL); - movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); - jccb (Assembler::notZero, DONE_LABEL); - movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); - jmpb (DONE_LABEL); -#else // _LP64 - // It's inflated - Label CheckSucc, LNotRecursive, LSuccess, LGoSlowPath; + Label LSuccess, LNotRecursive; cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0); jccb(Assembler::equal, LNotRecursive); // Recursive inflated unlock - decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); + decrement(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); jmpb(LSuccess); bind(LNotRecursive); + + // Set owner to null. + // Release to satisfy the JMM + movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); + + // Check if the entry lists are empty. movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - jccb (Assembler::notZero, CheckSucc); - // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. - movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); - jmpb (DONE_LABEL); + jccb(Assembler::zero, LSuccess); // If so we are done. - // Try to avoid passing control into the slow_path ... - bind (CheckSucc); - - // The following optional optimization can be elided if necessary - // Effectively: if (succ == null) goto slow path - // The code reduces the window for a race, however, - // and thus benefits performance. + // Check if there is a successor. cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD); - jccb (Assembler::zero, LGoSlowPath); + jccb(Assembler::notZero, LSuccess); // If so we are done. - xorptr(boxReg, boxReg); - // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. - movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + andptr(tmpReg, ~(int32_t)markWord::monitor_value); +#ifndef _LP64 + get_thread(boxReg); + movptr(Address(boxReg, JavaThread::unlocked_inflated_monitor_offset()), tmpReg); +#else // _LP64 + movptr(Address(r15_thread, JavaThread::unlocked_inflated_monitor_offset()), tmpReg); +#endif - // Memory barrier/fence - // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ - // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. - // This is faster on Nehalem and AMD Shanghai/Barcelona. - // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences - // We might also restructure (ST Owner=0;barrier;LD _Succ) to - // (mov box,0; xchgq box, &m->Owner; LD _succ) . - lock(); addl(Address(rsp, 0), 0); - - cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD); - jccb (Assembler::notZero, LSuccess); - - // Rare inopportune interleaving - race. - // The successor vanished in the small window above. - // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor. - // We need to ensure progress and succession. - // Try to reacquire the lock. - // If that fails then the new owner is responsible for succession and this - // thread needs to take no further action and can exit via the fast path (success). - // If the re-acquire succeeds then pass control into the slow path. - // As implemented, this latter mode is horrible because we generated more - // coherence traffic on the lock *and* artificially extended the critical section - // length while by virtue of passing control into the slow path. - - // box is really RAX -- the following CMPXCHG depends on that binding - // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R) - lock(); - cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); - // There's no successor so we tried to regrab the lock. - // If that didn't work, then another thread grabbed the - // lock so we're done (and exit was a success). - jccb (Assembler::notEqual, LSuccess); - // Intentional fall-through into slow path - - bind (LGoSlowPath); orl (boxReg, 1); // set ICC.ZF=0 to indicate failure jmpb (DONE_LABEL); @@ -547,7 +503,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t testl (boxReg, 0); // set ICC.ZF=1 to indicate success jmpb (DONE_LABEL); -#endif if (LockingMode == LM_LEGACY) { bind (Stacked); movptr(tmpReg, Address (boxReg, 0)); // re-fetch @@ -744,10 +699,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // Handle inflated monitor. Label inflated, inflated_check_lock_stack; // Finish fast unlock successfully. MUST jump with ZF == 1 - Label unlocked; - - // Assume success. - decrement(Address(thread, JavaThread::held_monitor_count_offset())); + Label unlocked, slow_path; const Register mark = t; const Register monitor = t; @@ -763,8 +715,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, } Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path(); - Label& check_successor = stub == nullptr ? dummy : stub->check_successor(); - Label& slow_path = stub == nullptr ? dummy : stub->slow_path(); { // Lightweight Unlock @@ -839,6 +789,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag}; const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag}; + const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag}; const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag}; const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag}; @@ -846,27 +797,42 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // Check if recursive. cmpptr(recursions_address, 0); - jccb(Assembler::notEqual, recursive); + jccb(Assembler::notZero, recursive); + + // Set owner to null. + // Release to satisfy the JMM + movptr(owner_address, NULL_WORD); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. + membar(StoreLoad); // Check if the entry lists are empty. movptr(reg_rax, cxq_address); orptr(reg_rax, EntryList_address); - jcc(Assembler::notZero, check_successor); + jccb(Assembler::zero, unlocked); // If so we are done. - // Release lock. - movptr(owner_address, NULL_WORD); - jmpb(unlocked); + // Check if there is a successor. + cmpptr(succ_address, NULL_WORD); + jccb(Assembler::notZero, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + if (!UseObjectMonitorTable) { + andptr(monitor, ~(int32_t)markWord::monitor_value); + } + movptr(Address(thread, JavaThread::unlocked_inflated_monitor_offset()), monitor); + + testl(monitor, monitor); // Fast Unlock ZF = 0 + jmpb(slow_path); // Recursive unlock. bind(recursive); decrement(recursions_address); - xorl(t, t); } bind(unlocked); - if (stub != nullptr) { - bind(stub->unlocked_continuation()); - } + decrement(Address(thread, JavaThread::held_monitor_count_offset())); + xorl(t, t); // Fast Unlock ZF = 1 #ifdef ASSERT // Check that unlocked label is reached with ZF set. @@ -875,6 +841,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, stop("Fast Unlock ZF != 1"); #endif + bind(slow_path); if (stub != nullptr) { bind(stub->slow_path_continuation()); } diff --git a/src/hotspot/share/opto/c2_CodeStubs.hpp b/src/hotspot/share/opto/c2_CodeStubs.hpp index 5db7596e072..318bc2f45fc 100644 --- a/src/hotspot/share/opto/c2_CodeStubs.hpp +++ b/src/hotspot/share/opto/c2_CodeStubs.hpp @@ -105,7 +105,6 @@ private: Register _thread; Label _slow_path; Label _push_and_slow_path; - Label _check_successor; Label _unlocked_continuation; public: C2FastUnlockLightweightStub(Register obj, Register mark, Register t, Register thread) : C2CodeStub(), @@ -114,7 +113,6 @@ public: void emit(C2_MacroAssembler& masm); Label& slow_path() { return _slow_path; } Label& push_and_slow_path() { return _push_and_slow_path; } - Label& check_successor() { return _check_successor; } Label& unlocked_continuation() { return _unlocked_continuation; } Label& slow_path_continuation() { return continuation(); } }; diff --git a/src/hotspot/share/runtime/javaThread.cpp b/src/hotspot/share/runtime/javaThread.cpp index 3528fc5b1bc..14528f6d908 100644 --- a/src/hotspot/share/runtime/javaThread.cpp +++ b/src/hotspot/share/runtime/javaThread.cpp @@ -487,6 +487,7 @@ JavaThread::JavaThread(MemTag mem_tag) : _cont_fastpath_thread_state(1), _held_monitor_count(0), _jni_monitor_count(0), + _unlocked_inflated_monitor(nullptr), _handshake(this), diff --git a/src/hotspot/share/runtime/javaThread.hpp b/src/hotspot/share/runtime/javaThread.hpp index e36b7dfe888..20bb08a4acb 100644 --- a/src/hotspot/share/runtime/javaThread.hpp +++ b/src/hotspot/share/runtime/javaThread.hpp @@ -464,6 +464,7 @@ class JavaThread: public Thread { // It's signed for error detection. intx _held_monitor_count; // used by continuations for fast lock detection intx _jni_monitor_count; + ObjectMonitor* _unlocked_inflated_monitor; private: @@ -615,6 +616,12 @@ private: intx jni_monitor_count() { return _jni_monitor_count; } void clear_jni_monitor_count() { _jni_monitor_count = 0; } + // Support for SharedRuntime::monitor_exit_helper() + ObjectMonitor* unlocked_inflated_monitor() const { return _unlocked_inflated_monitor; } + void clear_unlocked_inflated_monitor() { + _unlocked_inflated_monitor = nullptr; + } + inline bool is_vthread_mounted() const; inline const ContinuationEntry* vthread_continuation() const; @@ -828,6 +835,7 @@ private: static ByteSize cont_fastpath_offset() { return byte_offset_of(JavaThread, _cont_fastpath); } static ByteSize held_monitor_count_offset() { return byte_offset_of(JavaThread, _held_monitor_count); } static ByteSize jni_monitor_count_offset() { return byte_offset_of(JavaThread, _jni_monitor_count); } + static ByteSize unlocked_inflated_monitor_offset() { return byte_offset_of(JavaThread, _unlocked_inflated_monitor); } #if INCLUDE_JVMTI static ByteSize is_in_VTMS_transition_offset() { return byte_offset_of(JavaThread, _is_in_VTMS_transition); } diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 367d79a5283..755d49d2c6c 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -178,7 +178,7 @@ OopStorage* ObjectMonitor::_oop_storage = nullptr; // // Cxq points to the set of Recently Arrived Threads attempting entry. // Because we push threads onto _cxq with CAS, the RATs must take the form of -// a singly-linked LIFO. We drain _cxq into EntryList at unlock-time when +// a singly-linked LIFO. We drain _cxq into EntryList at unlock-time when // the unlocking thread notices that EntryList is null but _cxq is != null. // // The EntryList is ordered by the prevailing queue discipline and @@ -210,19 +210,6 @@ OopStorage* ObjectMonitor::_oop_storage = nullptr; // unpark the notifyee. Unparking a notifee in notify() is inefficient - // it's likely the notifyee would simply impale itself on the lock held // by the notifier. -// -// * An interesting alternative is to encode cxq as (List,LockByte) where -// the LockByte is 0 iff the monitor is owned. _owner is simply an auxiliary -// variable, like _recursions, in the scheme. The threads or Events that form -// the list would have to be aligned in 256-byte addresses. A thread would -// try to acquire the lock or enqueue itself with CAS, but exiting threads -// could use a 1-0 protocol and simply STB to set the LockByte to 0. -// Note that is is *not* word-tearing, but it does presume that full-word -// CAS operations are coherent with intermix with STB operations. That's true -// on most common processors. -// -// * See also http://blogs.sun.com/dave - // Check that object() and set_object() are called from the right context: static void check_object_context() { @@ -257,7 +244,6 @@ ObjectMonitor::ObjectMonitor(oop object) : _EntryList(nullptr), _cxq(nullptr), _succ(nullptr), - _Responsible(nullptr), _SpinDuration(ObjectMonitor::Knob_SpinLimit), _contentions(0), _WaitSet(nullptr), @@ -320,17 +306,11 @@ bool ObjectMonitor::enter_is_async_deflating() { return false; } -void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { - // Used by ObjectSynchronizer::enter_for to enter for another thread. - // The monitor is private to or already owned by locking_thread which must be suspended. - // So this code may only contend with deflation. - assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); +bool ObjectMonitor::TryLockWithContentionMark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { assert(contention_mark._monitor == this, "must be"); assert(!is_being_async_deflated(), "must be"); - void* prev_owner = try_set_owner_from(nullptr, locking_thread); - bool success = false; if (prev_owner == nullptr) { @@ -343,8 +323,16 @@ void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, O // Racing with deflation. prev_owner = try_set_owner_from(DEFLATER_MARKER, locking_thread); if (prev_owner == DEFLATER_MARKER) { - // Cancelled deflation. Increment contentions as part of the deflation protocol. - add_to_contentions(1); + // We successfully cancelled the in-progress async deflation by + // changing owner from DEFLATER_MARKER to current. We now extend + // the lifetime of the contention_mark (e.g. contentions++) here + // to prevent the deflater thread from winning the last part of + // the 2-part async deflation protocol after the regular + // decrement occurs when the contention_mark goes out of + // scope. ObjectMonitor::deflate_monitor() which is called by + // the deflater thread will decrement contentions after it + // recognizes that the async deflation was cancelled. + contention_mark.extend(); success = true; } else if (prev_owner == nullptr) { // At this point we cannot race with deflation as we have both incremented @@ -360,12 +348,28 @@ void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, O set_owner_from_BasicLock(prev_owner, locking_thread); success = true; } + assert(!success || owner_raw() == locking_thread, "must be"); + + return success; +} + +void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { + // Used by LightweightSynchronizer::inflate_and_enter in deoptimization path to enter for another thread. + // The monitor is private to or already owned by locking_thread which must be suspended. + // So this code may only contend with deflation. + assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); + bool success = TryLockWithContentionMark(locking_thread, contention_mark); + assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT - ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}, observed owner: " INTPTR_FORMAT, - p2i(locking_thread), p2i(this), p2i(owner_raw()), p2i(prev_owner)); + ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}", + p2i(locking_thread), p2i(this), p2i(owner_raw())); } bool ObjectMonitor::enter_for(JavaThread* locking_thread) { + // Used by ObjectSynchronizer::enter_for() to enter for another thread. + // The monitor is private to or already owned by locking_thread which must be suspended. + // So this code may only contend with deflation. + assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); // Block out deflation as soon as possible. ObjectMonitorContentionMark contention_mark(this); @@ -375,19 +379,29 @@ bool ObjectMonitor::enter_for(JavaThread* locking_thread) { return false; } - enter_for_with_contention_mark(locking_thread, contention_mark); + bool success = TryLockWithContentionMark(locking_thread, contention_mark); + + assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT + ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}", + p2i(locking_thread), p2i(this), p2i(owner_raw())); assert(owner_raw() == locking_thread, "must be"); return true; } -bool ObjectMonitor::try_enter(JavaThread* current) { - // TryLock avoids the CAS +bool ObjectMonitor::try_enter(JavaThread* current, bool check_for_recursion) { + // TryLock avoids the CAS and handles deflation. TryLockResult r = TryLock(current); if (r == TryLockResult::Success) { assert(_recursions == 0, "invariant"); return true; } + // If called from SharedRuntime::monitor_exit_helper(), we know that + // this thread doesn't already own the lock. + if (!check_for_recursion) { + return false; + } + if (r == TryLockResult::HasOwner && owner() == current) { _recursions++; return true; @@ -400,7 +414,6 @@ bool ObjectMonitor::try_enter(JavaThread* current) { set_owner_from_BasicLock(cur, current); // Convert from BasicLock* to Thread*. return true; } - return false; } @@ -561,16 +574,40 @@ void ObjectMonitor::enter_with_contention_mark(JavaThread *current, ObjectMonito ObjectMonitor::TryLockResult ObjectMonitor::TryLock(JavaThread* current) { void* own = owner_raw(); - if (own != nullptr) return TryLockResult::HasOwner; - if (try_set_owner_from(nullptr, current) == nullptr) { - assert(_recursions == 0, "invariant"); - return TryLockResult::Success; + void* first_own = own; + + for (;;) { + if (own == DEFLATER_MARKER) { + // Block out deflation as soon as possible. + ObjectMonitorContentionMark contention_mark(this); + + // Check for deflation. + if (enter_is_async_deflating()) { + // Treat deflation as interference. + return TryLockResult::Interference; + } + if (TryLockWithContentionMark(current, contention_mark)) { + assert(_recursions == 0, "invariant"); + return TryLockResult::Success; + } else { + // Deflation won or change of owner; dont spin + break; + } + } else if (own == nullptr) { + void* prev_own = try_set_owner_from(nullptr, current); + if (prev_own == nullptr) { + assert(_recursions == 0, "invariant"); + return TryLockResult::Success; + } else { + // The lock had been free momentarily, but we lost the race to the lock. + own = prev_own; + } + } else { + // Retry doesn't make as much sense because the lock was just acquired. + break; + } } - // The lock had been free momentarily, but we lost the race to the lock. - // Interference -- the CAS failed. - // We can either return -1 or retry. - // Retry doesn't make as much sense because the lock was just acquired. - return TryLockResult::Interference; + return first_own == own ? TryLockResult::HasOwner : TryLockResult::Interference; } // Deflate the specified ObjectMonitor if not in-use. Returns true if it @@ -746,8 +783,6 @@ const char* ObjectMonitor::is_busy_to_string(stringStream* ss) { return ss->base(); } -#define MAX_RECHECK_INTERVAL 1000 - void ObjectMonitor::EnterI(JavaThread* current) { assert(current->thread_state() == _thread_blocked, "invariant"); @@ -755,25 +790,6 @@ void ObjectMonitor::EnterI(JavaThread* current) { if (TryLock(current) == TryLockResult::Success) { assert(_succ != current, "invariant"); assert(owner_raw() == current, "invariant"); - assert(_Responsible != current, "invariant"); - return; - } - - if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) { - // Cancelled the in-progress async deflation by changing owner from - // DEFLATER_MARKER to current. As part of the contended enter protocol, - // contentions was incremented to a positive value before EnterI() - // was called and that prevents the deflater thread from winning the - // last part of the 2-part async deflation protocol. After EnterI() - // returns to enter(), contentions is decremented because the caller - // now owns the monitor. We bump contentions an extra time here to - // prevent the deflater thread from winning the last part of the - // 2-part async deflation protocol after the regular decrement - // occurs in enter(). The deflater thread will decrement contentions - // after it recognizes that the async deflation was cancelled. - add_to_contentions(1); - assert(_succ != current, "invariant"); - assert(_Responsible != current, "invariant"); return; } @@ -789,14 +805,12 @@ void ObjectMonitor::EnterI(JavaThread* current) { if (TrySpin(current)) { assert(owner_raw() == current, "invariant"); assert(_succ != current, "invariant"); - assert(_Responsible != current, "invariant"); return; } // The Spin failed -- Enqueue and park the thread ... assert(_succ != current, "invariant"); assert(owner_raw() != current, "invariant"); - assert(_Responsible != current, "invariant"); // Enqueue "current" on ObjectMonitor's _cxq. // @@ -826,40 +840,10 @@ void ObjectMonitor::EnterI(JavaThread* current) { if (TryLock(current) == TryLockResult::Success) { assert(_succ != current, "invariant"); assert(owner_raw() == current, "invariant"); - assert(_Responsible != current, "invariant"); return; } } - // Check for cxq|EntryList edge transition to non-null. This indicates - // the onset of contention. While contention persists exiting threads - // will use a ST:MEMBAR:LD 1-1 exit protocol. When contention abates exit - // operations revert to the faster 1-0 mode. This enter operation may interleave - // (race) a concurrent 1-0 exit operation, resulting in stranding, so we - // arrange for one of the contending thread to use a timed park() operations - // to detect and recover from the race. (Stranding is form of progress failure - // where the monitor is unlocked but all the contending threads remain parked). - // That is, at least one of the contended threads will periodically poll _owner. - // One of the contending threads will become the designated "Responsible" thread. - // The Responsible thread uses a timed park instead of a normal indefinite park - // operation -- it periodically wakes and checks for and recovers from potential - // strandings admitted by 1-0 exit operations. We need at most one Responsible - // thread per-monitor at any given moment. Only threads on cxq|EntryList may - // be responsible for a monitor. - // - // Currently, one of the contended threads takes on the added role of "Responsible". - // A viable alternative would be to use a dedicated "stranding checker" thread - // that periodically iterated over all the threads (or active monitors) and unparked - // successors where there was risk of stranding. This would help eliminate the - // timer scalability issues we see on some platforms as we'd only have one thread - // -- the checker -- parked on a timer. - - if (nxt == nullptr && _EntryList == nullptr) { - // Try to assume the role of responsible thread for the monitor. - // CONSIDER: ST vs CAS vs { if (Responsible==null) Responsible=current } - Atomic::replace_if_null(&_Responsible, current); - } - // The lock might have been released while this thread was occupied queueing // itself onto _cxq. To close the race and avoid "stranding" and // progress-liveness failure we must resample-retry _owner before parking. @@ -871,8 +855,6 @@ void ObjectMonitor::EnterI(JavaThread* current) { // to defer the state transitions until absolutely necessary, // and in doing so avoid some transitions ... - int recheckInterval = 1; - for (;;) { if (TryLock(current) == TryLockResult::Success) { @@ -881,37 +863,12 @@ void ObjectMonitor::EnterI(JavaThread* current) { assert(owner_raw() != current, "invariant"); // park self - if (_Responsible == current) { - current->_ParkEvent->park((jlong) recheckInterval); - // Increase the recheckInterval, but clamp the value. - recheckInterval *= 8; - if (recheckInterval > MAX_RECHECK_INTERVAL) { - recheckInterval = MAX_RECHECK_INTERVAL; - } - } else { - current->_ParkEvent->park(); - } + current->_ParkEvent->park(); if (TryLock(current) == TryLockResult::Success) { break; } - if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) { - // Cancelled the in-progress async deflation by changing owner from - // DEFLATER_MARKER to current. As part of the contended enter protocol, - // contentions was incremented to a positive value before EnterI() - // was called and that prevents the deflater thread from winning the - // last part of the 2-part async deflation protocol. After EnterI() - // returns to enter(), contentions is decremented because the caller - // now owns the monitor. We bump contentions an extra time here to - // prevent the deflater thread from winning the last part of the - // 2-part async deflation protocol after the regular decrement - // occurs in enter(). The deflater thread will decrement contentions - // after it recognizes that the async deflation was cancelled. - add_to_contentions(1); - break; - } - // The lock is still contested. // Keep a tally of the # of futile wakeups. @@ -953,44 +910,23 @@ void ObjectMonitor::EnterI(JavaThread* current) { assert(owner_raw() == current, "invariant"); UnlinkAfterAcquire(current, &node); - if (_succ == current) _succ = nullptr; - - assert(_succ != current, "invariant"); - if (_Responsible == current) { - _Responsible = nullptr; - OrderAccess::fence(); // Dekker pivot-point - - // We may leave threads on cxq|EntryList without a designated - // "Responsible" thread. This is benign. When this thread subsequently - // exits the monitor it can "see" such preexisting "old" threads -- - // threads that arrived on the cxq|EntryList before the fence, above -- - // by LDing cxq|EntryList. Newly arrived threads -- that is, threads - // that arrive on cxq after the ST:MEMBAR, above -- will set Responsible - // non-null and elect a new "Responsible" timer thread. - // - // This thread executes: - // ST Responsible=null; MEMBAR (in enter epilogue - here) - // LD cxq|EntryList (in subsequent exit) - // - // Entering threads in the slow/contended path execute: - // ST cxq=nonnull; MEMBAR; LD Responsible (in enter prolog) - // The (ST cxq; MEMBAR) is accomplished with CAS(). - // - // The MEMBAR, above, prevents the LD of cxq|EntryList in the subsequent - // exit operation from floating above the ST Responsible=null. + if (_succ == current) { + _succ = nullptr; + // Note that we don't need to do OrderAccess::fence() after clearing + // _succ here, since we own the lock. } // We've acquired ownership with CAS(). // CAS is serializing -- it has MEMBAR/FENCE-equivalent semantics. // But since the CAS() this thread may have also stored into _succ, - // EntryList, cxq or Responsible. These meta-data updates must be + // EntryList or cxq. These meta-data updates must be // visible __before this thread subsequently drops the lock. // Consider what could occur if we didn't enforce this constraint -- // STs to monitor meta-data and user-data could reorder with (become // visible after) the ST in exit that drops ownership of the lock. // Some other thread could then acquire the lock, but observe inconsistent // or old monitor meta-data and heap data. That violates the JMM. - // To that end, the 1-0 exit() operation must have at least STST|LDST + // To that end, the exit() operation must have at least STST|LDST // "release" barrier semantics. Specifically, there must be at least a // STST|LDST barrier in exit() before the ST of null into _owner that drops // the lock. The barrier ensures that changes to monitor meta-data and data @@ -1000,8 +936,7 @@ void ObjectMonitor::EnterI(JavaThread* current) { // // Critically, any prior STs to _succ or EntryList must be visible before // the ST of null into _owner in the *subsequent* (following) corresponding - // monitorexit. Recall too, that in 1-0 mode monitorexit does not necessarily - // execute a serializing instruction. + // monitorexit. return; } @@ -1174,39 +1109,32 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren // In that case exit() is called with _thread_state == _thread_blocked, // but the monitor's _contentions field is > 0, which inhibits reclamation. // -// 1-0 exit -// ~~~~~~~~ -// ::exit() uses a canonical 1-1 idiom with a MEMBAR although some of -// the fast-path operators have been optimized so the common ::exit() -// operation is 1-0, e.g., see macroAssembler_x86.cpp: fast_unlock(). -// The code emitted by fast_unlock() elides the usual MEMBAR. This -// greatly improves latency -- MEMBAR and CAS having considerable local -// latency on modern processors -- but at the cost of "stranding". Absent the -// MEMBAR, a thread in fast_unlock() can race a thread in the slow -// ::enter() path, resulting in the entering thread being stranding -// and a progress-liveness failure. Stranding is extremely rare. -// We use timers (timed park operations) & periodic polling to detect -// and recover from stranding. Potentially stranded threads periodically -// wake up and poll the lock. See the usage of the _Responsible variable. +// This is the exit part of the locking protocol, often implemented in +// C2_MacroAssembler::fast_unlock() // -// The CAS() in enter provides for safety and exclusion, while the CAS or -// MEMBAR in exit provides for progress and avoids stranding. 1-0 locking -// eliminates the CAS/MEMBAR from the exit path, but it admits stranding. -// We detect and recover from stranding with timers. +// 1. A release barrier ensures that changes to monitor meta-data +// (_succ, _EntryList, _cxq) and data protected by the lock will be +// visible before we release the lock. +// 2. Release the lock by clearing the owner. +// 3. A storeload MEMBAR is needed between releasing the owner and +// subsequently reading meta-data to safely determine if the lock is +// contended (step 4) without an elected successor (step 5). +// 4. If both _EntryList and _cxq are null, we are done, since there is no +// other thread waiting on the lock to wake up. I.e. there is no +// contention. +// 5. If there is a successor (_succ is non-null), we are done. The +// responsibility for guaranteeing progress-liveness has now implicitly +// been moved from the exiting thread to the successor. +// 6. There are waiters in the entry list (_EntryList and/or cxq are +// non-null), but there is no successor (_succ is null), so we need to +// wake up (unpark) a waiting thread to avoid stranding. // -// If a thread transiently strands it'll park until (a) another -// thread acquires the lock and then drops the lock, at which time the -// exiting thread will notice and unpark the stranded thread, or, (b) -// the timer expires. If the lock is high traffic then the stranding latency -// will be low due to (a). If the lock is low traffic then the odds of -// stranding are lower, although the worst-case stranding latency -// is longer. Critically, we don't want to put excessive load in the -// platform's timer subsystem. We want to minimize both the timer injection -// rate (timers created/sec) as well as the number of timers active at -// any one time. (more precisely, we want to minimize timer-seconds, which is -// the integral of the # of active timers at any instant over time). -// Both impinge on OS scalability. Given that, at most one thread parked on -// a monitor will use a timer. +// Note that since only the current lock owner can manipulate the _EntryList +// or drain _cxq, we need to reacquire the lock before we can wake up +// (unpark) a waiting thread. +// +// The CAS() in enter provides for safety and exclusion, while the +// MEMBAR in exit provides for progress and avoids stranding. // // There is also the risk of a futile wake-up. If we drop the lock // another thread can reacquire the lock immediately, and we can @@ -1248,10 +1176,6 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { return; } - // Invariant: after setting Responsible=null an thread must execute - // a MEMBAR or other serializing instruction before fetching EntryList|cxq. - _Responsible = nullptr; - #if INCLUDE_JFR // get the owner's thread id for the MonitorEnter event // if it is enabled and the thread isn't suspended @@ -1278,14 +1202,15 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // Other threads are blocked trying to acquire the lock. // Normally the exiting thread is responsible for ensuring succession, - // but if other successors are ready or other entering threads are spinning - // then this thread can simply store null into _owner and exit without - // waking a successor. The existence of spinners or ready successors - // guarantees proper succession (liveness). Responsibility passes to the - // ready or running successors. The exiting thread delegates the duty. - // More precisely, if a successor already exists this thread is absolved - // of the responsibility of waking (unparking) one. - // + // but if this thread observes other successors are ready or other + // entering threads are spinning after it has stored null into _owner + // then it can exit without waking a successor. The existence of + // spinners or ready successors guarantees proper succession (liveness). + // Responsibility passes to the ready or running successors. The exiting + // thread delegates the duty. More precisely, if a successor already + // exists this thread is absolved of the responsibility of waking + // (unparking) one. + // The _succ variable is critical to reducing futile wakeup frequency. // _succ identifies the "heir presumptive" thread that has been made // ready (unparked) but that has not yet run. We need only one such @@ -1296,24 +1221,20 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // Note that spinners in Enter() also set _succ non-null. // In the current implementation spinners opportunistically set // _succ so that exiting threads might avoid waking a successor. - // Another less appealing alternative would be for the exiting thread - // to drop the lock and then spin briefly to see if a spinner managed - // to acquire the lock. If so, the exiting thread could exit - // immediately without waking a successor, otherwise the exiting - // thread would need to dequeue and wake a successor. - // (Note that we'd need to make the post-drop spin short, but no - // shorter than the worst-case round-trip cache-line migration time. - // The dropped lock needs to become visible to the spinner, and then - // the acquisition of the lock by the spinner must become visible to - // the exiting thread). + // Which means that the exiting thread could exit immediately without + // waking a successor, if it observes a successor after it has dropped + // the lock. Note that the dropped lock needs to become visible to the + // spinner. // It appears that an heir-presumptive (successor) must be made ready. // Only the current lock owner can manipulate the EntryList or // drain _cxq, so we need to reacquire the lock. If we fail // to reacquire the lock the responsibility for ensuring succession // falls to the new owner. - // - if (try_set_owner_from(nullptr, current) != nullptr) { + + if (TryLock(current) != TryLockResult::Success) { + // Some other thread acquired the lock (or the monitor was + // deflated). Either way we are done. return; } @@ -1376,7 +1297,7 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { q = p; } - // In 1-0 mode we need: ST EntryList; MEMBAR #storestore; ST _owner = nullptr + // We need to: ST EntryList; MEMBAR #storestore; ST _owner = nullptr // The MEMBAR is satisfied by the release_store() operation in ExitEpilog(). // See if we can abdicate to a spinner instead of waking a thread. @@ -1566,8 +1487,6 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) { AddWaiter(&node); Thread::SpinRelease(&_WaitSetLock); - _Responsible = nullptr; - intx save = _recursions; // record the old recursion count _waiters++; // increment the number of waiters _recursions = 0; // set the recursion level to be 1 @@ -2245,7 +2164,6 @@ void ObjectMonitor::print() const { print_on(tty); } // _EntryList = 0x0000000000000000 // _cxq = 0x0000000000000000 // _succ = 0x0000000000000000 -// _Responsible = 0x0000000000000000 // _SpinDuration = 5000 // _contentions = 0 // _WaitSet = 0x0000700009756248 @@ -2274,7 +2192,6 @@ void ObjectMonitor::print_debug_style_on(outputStream* st) const { st->print_cr(" _EntryList = " INTPTR_FORMAT, p2i(_EntryList)); st->print_cr(" _cxq = " INTPTR_FORMAT, p2i(_cxq)); st->print_cr(" _succ = " INTPTR_FORMAT, p2i(_succ)); - st->print_cr(" _Responsible = " INTPTR_FORMAT, p2i(_Responsible)); st->print_cr(" _SpinDuration = %d", _SpinDuration); st->print_cr(" _contentions = %d", contentions()); st->print_cr(" _WaitSet = " INTPTR_FORMAT, p2i(_WaitSet)); diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index ef85559c2b6..30d2e509416 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -179,7 +179,6 @@ class ObjectMonitor : public CHeapObj { ObjectWaiter* volatile _cxq; // LL of recently-arrived threads blocked on entry. JavaThread* volatile _succ; // Heir presumptive thread - used for futile wakeup throttling - JavaThread* volatile _Responsible; volatile int _SpinDuration; @@ -348,7 +347,7 @@ class ObjectMonitor : public CHeapObj { void enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark); bool enter_for(JavaThread* locking_thread); bool enter(JavaThread* current); - bool try_enter(JavaThread* current); + bool try_enter(JavaThread* current, bool check_for_recursion = true); bool spin_enter(JavaThread* current); void enter_with_contention_mark(JavaThread* current, ObjectMonitorContentionMark& contention_mark); void exit(JavaThread* current, bool not_suspended = true); @@ -377,6 +376,7 @@ class ObjectMonitor : public CHeapObj { enum class TryLockResult { Interference = -1, HasOwner = 0, Success = 1 }; + bool TryLockWithContentionMark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark); TryLockResult TryLock(JavaThread* current); bool TrySpin(JavaThread* current); @@ -395,12 +395,17 @@ class ObjectMonitorContentionMark : StackObj { DEBUG_ONLY(friend class ObjectMonitor;) ObjectMonitor* _monitor; + bool _extended; NONCOPYABLE(ObjectMonitorContentionMark); public: explicit ObjectMonitorContentionMark(ObjectMonitor* monitor); ~ObjectMonitorContentionMark(); + + // Extends the contention scope beyond this objects lifetime. + // Requires manual decrement of the contentions counter. + void extend(); }; #endif // SHARE_RUNTIME_OBJECTMONITOR_HPP diff --git a/src/hotspot/share/runtime/objectMonitor.inline.hpp b/src/hotspot/share/runtime/objectMonitor.inline.hpp index d26c459b1b4..6d3c6ff24c3 100644 --- a/src/hotspot/share/runtime/objectMonitor.inline.hpp +++ b/src/hotspot/share/runtime/objectMonitor.inline.hpp @@ -206,15 +206,32 @@ inline void ObjectMonitor::set_next_om(ObjectMonitor* new_value) { Atomic::store(&_next_om, new_value); } +// Block out deflation. inline ObjectMonitorContentionMark::ObjectMonitorContentionMark(ObjectMonitor* monitor) - : _monitor(monitor) { + : _monitor(monitor), _extended(false) { + // Contentions is incremented to a positive value as part of the + // contended enter protocol, which prevents the deflater thread from + // winning the last part of the 2-part async deflation + // protocol. See: ObjectMonitor::deflate_monitor() and + // ObjectMonitor::TryLockWithContentionMark(). _monitor->add_to_contentions(1); } inline ObjectMonitorContentionMark::~ObjectMonitorContentionMark() { + // Decrement contentions when the contention mark goes out of + // scope. This opens up for deflation, if the contention mark + // hasn't been extended. _monitor->add_to_contentions(-1); } +inline void ObjectMonitorContentionMark::extend() { + // Used by ObjectMonitor::TryLockWithContentionMark() to "extend the + // lifetime" of the contention mark. + assert(!_extended, "extending twice is probably a bad design"); + _monitor->add_to_contentions(1); + _extended = true; +} + inline oop ObjectMonitor::object_peek() const { if (_object.is_null()) { return nullptr; diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index 6ca7f42e038..e4d4e6aea0f 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -1963,6 +1963,26 @@ void SharedRuntime::monitor_exit_helper(oopDesc* obj, BasicLock* lock, JavaThrea assert(JavaThread::current() == current, "invariant"); // Exit must be non-blocking, and therefore no exceptions can be thrown. ExceptionMark em(current); + + // Check if C2_MacroAssembler::fast_unlock() or + // C2_MacroAssembler::fast_unlock_lightweight() unlocked an inflated + // monitor before going slow path. Since there is no safepoint + // polling when calling into the VM, we can be sure that the monitor + // hasn't been deallocated. + ObjectMonitor* m = current->unlocked_inflated_monitor(); + if (m != nullptr) { + assert(m->owner_raw() != current, "must be"); + current->clear_unlocked_inflated_monitor(); + + // We need to reacquire the lock before we can call ObjectSynchronizer::exit(). + if (!m->try_enter(current, /*check_for_recursion*/ false)) { + // Some other thread acquired the lock (or the monitor was + // deflated). Either way we are done. + current->dec_held_monitor_count(); + return; + } + } + // The object could become unlocked through a JNI call, which we have no other checks for. // Give a fatal message if CheckJNICalls. Otherwise we ignore it. if (obj->is_unlocked()) { diff --git a/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java b/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java index 3ed862e8218..39c8569532e 100644 --- a/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java +++ b/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java @@ -309,10 +309,11 @@ public class LockUnlock { } /** - * With two threads lockObject1 will be contended so should be - * inflated. + * With three threads lockObject1 will be contended so should be + * inflated. Three threads is also needed to ensure a high level + * of code coverage in the locking code. */ - @Threads(2) + @Threads(3) @Benchmark public void testContendedLock() { synchronized (lockObject1) {