From a9e28164ebd55a1d4788e04360f88b1ec2e17ddc Mon Sep 17 00:00:00 2001 From: "Daniel D. Daugherty" Date: Fri, 13 Feb 2015 10:01:42 -0800 Subject: [PATCH] 8061553: Contended Locking fast enter bucket JEP-143/JDK-8061553 Contended Locking fast enter bucket Co-authored-by: Dave Dice Co-authored-by: Karen Kinnear Reviewed-by: dholmes, acorn --- .../src/cpu/sparc/vm/macroAssembler_sparc.cpp | 16 +---- hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp | 45 ++----------- .../src/share/vm/runtime/interfaceSupport.hpp | 8 ++- .../src/share/vm/runtime/sharedRuntime.cpp | 15 ++++- hotspot/src/share/vm/runtime/synchronizer.cpp | 64 +++++++++++++++++++ hotspot/src/share/vm/runtime/synchronizer.hpp | 2 + 6 files changed, 96 insertions(+), 54 deletions(-) diff --git a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp index 36faeec2ee5..226b80daade 100644 --- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2943,24 +2943,14 @@ void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark, } bind (IsInflated); - if (EmitSync & 64) { - // If m->owner != null goto IsLocked - // Test-and-CAS vs CAS - // Pessimistic form avoids futile (doomed) CAS attempts - // The optimistic form avoids RTS->RTO cache line upgrades. - ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch); - andcc(Rscratch, Rscratch, G0); - brx(Assembler::notZero, false, Assembler::pn, done); - delayed()->nop(); - // m->owner == null : it's unlocked. - } // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); mov(G2_thread, Rscratch); cas_ptr(Rmark, G0, Rscratch); - cmp(Rscratch, G0); + andcc(Rscratch, Rscratch, G0); // set ICCs for done: icc.zf iff success + // set icc.zf : 1=success 0=failure // ST box->displaced_header = NonZero. // Any non-zero value suffices: // markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc. diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp index 2cb6bd820d0..85c738ad4ed 100644 --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp @@ -1718,27 +1718,6 @@ void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg // Force all sync thru slow-path: slow_enter() and slow_exit() movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); cmpptr (rsp, (int32_t)NULL_WORD); - } else - if (EmitSync & 2) { - Label DONE_LABEL ; - if (UseBiasedLocking) { - // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. - biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters); - } - - movptr(tmpReg, Address(objReg, 0)); // fetch markword - orptr (tmpReg, 0x1); - movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS - if (os::is_MP()) { - lock(); - } - cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg - jccb(Assembler::equal, DONE_LABEL); - // Recursive locking - subptr(tmpReg, rsp); - andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); - movptr(Address(boxReg, 0), tmpReg); - bind(DONE_LABEL); } else { // Possible cases that we'll encounter in fast_lock // ------------------------------------------------ @@ -1923,29 +1902,19 @@ void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg } #else // _LP64 // It's inflated + movq(scrReg, tmpReg); + xorq(tmpReg, tmpReg); - // TODO: someday avoid the ST-before-CAS penalty by - // relocating (deferring) the following ST. - // We should also think about trying a CAS without having - // fetched _owner. If the CAS is successful we may - // avoid an RTO->RTS upgrade on the $line. - - // Without cast to int32_t a movptr will destroy r10 which is typically obj - movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); - - movptr (boxReg, tmpReg); - movptr(tmpReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); - testptr(tmpReg, tmpReg); - jccb (Assembler::notZero, DONE_LABEL); - - // It's inflated and appears unlocked if (os::is_MP()) { lock(); } - cmpxchgptr(r15_thread, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + // Unconditionally set box->_displaced_header = markOopDesc::unused_mark(). + // Without cast to int32_t movptr will destroy r10 which is typically obj. + movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); // Intentional fall-through into DONE_LABEL ... + // Propagate ICC.ZF from CAS above into DONE_LABEL. #endif // _LP64 - #if INCLUDE_RTM_OPT } // use_rtm() #endif diff --git a/hotspot/src/share/vm/runtime/interfaceSupport.hpp b/hotspot/src/share/vm/runtime/interfaceSupport.hpp index fa9ad5e5185..471be25d751 100644 --- a/hotspot/src/share/vm/runtime/interfaceSupport.hpp +++ b/hotspot/src/share/vm/runtime/interfaceSupport.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -511,6 +511,12 @@ class RuntimeHistogramElement : public HistogramElement { Thread* THREAD = thread; \ debug_only(VMEntryWrapper __vew;) +#define JRT_BLOCK_NO_ASYNC \ + { \ + ThreadInVMfromJavaNoAsyncException __tiv(thread); \ + Thread* THREAD = thread; \ + debug_only(VMEntryWrapper __vew;) + #define JRT_BLOCK_END } #define JRT_END } diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.cpp b/hotspot/src/share/vm/runtime/sharedRuntime.cpp index c2b72937a8f..04a23aa23c1 100644 --- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp +++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1792,7 +1792,17 @@ JRT_END // Handles the uncommon case in locking, i.e., contention or an inflated lock. -JRT_ENTRY_NO_ASYNC(void, SharedRuntime::complete_monitor_locking_C(oopDesc* _obj, BasicLock* lock, JavaThread* thread)) +JRT_BLOCK_ENTRY(void, SharedRuntime::complete_monitor_locking_C(oopDesc* _obj, BasicLock* lock, JavaThread* thread)) + if (!SafepointSynchronize::is_synchronizing()) { + // Only try quick_enter() if we're not trying to reach a safepoint + // so that the calling thread reaches the safepoint more quickly. + if (ObjectSynchronizer::quick_enter(_obj, thread, lock)) return; + } + // NO_ASYNC required because an async exception on the state transition destructor + // would leave you with the lock held and it would never be released. + // The normal monitorenter NullPointerException is thrown without acquiring a lock + // and the model is that an exception implies the method failed. + JRT_BLOCK_NO_ASYNC oop obj(_obj); if (PrintBiasedLockingStatistics) { Atomic::inc(BiasedLocking::slow_path_entry_count_addr()); @@ -1805,6 +1815,7 @@ JRT_ENTRY_NO_ASYNC(void, SharedRuntime::complete_monitor_locking_C(oopDesc* _obj ObjectSynchronizer::slow_enter(h_obj, lock, CHECK); } assert(!HAS_PENDING_EXCEPTION, "Should have no exception here"); + JRT_BLOCK_END JRT_END // Handles the uncommon cases of monitor unlocking in compiled code diff --git a/hotspot/src/share/vm/runtime/synchronizer.cpp b/hotspot/src/share/vm/runtime/synchronizer.cpp index 7f829ba6991..fa2928f6287 100644 --- a/hotspot/src/share/vm/runtime/synchronizer.cpp +++ b/hotspot/src/share/vm/runtime/synchronizer.cpp @@ -122,6 +122,70 @@ static volatile int MonitorFreeCount = 0; // # on gFreeList static volatile int MonitorPopulation = 0; // # Extant -- in circulation #define CHAINMARKER (cast_to_oop(-1)) + +// =====================> Quick functions + +// The quick_* forms are special fast-path variants used to improve +// performance. In the simplest case, a "quick_*" implementation could +// simply return false, in which case the caller will perform the necessary +// state transitions and call the slow-path form. +// The fast-path is designed to handle frequently arising cases in an efficient +// manner and is just a degenerate "optimistic" variant of the slow-path. +// returns true -- to indicate the call was satisfied. +// returns false -- to indicate the call needs the services of the slow-path. +// A no-loitering ordinance is in effect for code in the quick_* family +// operators: safepoints or indefinite blocking (blocking that might span a +// safepoint) are forbidden. Generally the thread_state() is _in_Java upon +// entry. + +// The LockNode emitted directly at the synchronization site would have +// been too big if it were to have included support for the cases of inflated +// recursive enter and exit, so they go here instead. +// Note that we can't safely call AsyncPrintJavaStack() from within +// quick_enter() as our thread state remains _in_Java. + +bool ObjectSynchronizer::quick_enter(oop obj, Thread * Self, + BasicLock * Lock) { + assert(!SafepointSynchronize::is_at_safepoint(), "invariant"); + assert(Self->is_Java_thread(), "invariant"); + assert(((JavaThread *) Self)->thread_state() == _thread_in_Java, "invariant"); + No_Safepoint_Verifier nsv; + if (obj == NULL) return false; // Need to throw NPE + const markOop mark = obj->mark(); + + if (mark->has_monitor()) { + ObjectMonitor * const m = mark->monitor(); + assert(m->object() == obj, "invariant"); + Thread * const owner = (Thread *) m->_owner; + + // Lock contention and Transactional Lock Elision (TLE) diagnostics + // and observability + // Case: light contention possibly amenable to TLE + // Case: TLE inimical operations such as nested/recursive synchronization + + if (owner == Self) { + m->_recursions++; + return true; + } + + if (owner == NULL && + Atomic::cmpxchg_ptr(Self, &(m->_owner), NULL) == NULL) { + assert(m->_recursions == 0, "invariant"); + assert(m->_owner == Self, "invariant"); + return true; + } + } + + // Note that we could inflate in quick_enter. + // This is likely a useful optimization + // Critically, in quick_enter() we must not: + // -- perform bias revocation, or + // -- block indefinitely, or + // -- reach a safepoint + + return false; // revert to slow-path +} + // ----------------------------------------------------------------------------- // Fast Monitor Enter/Exit // This the fast monitor enter. The interpreter and compiler use diff --git a/hotspot/src/share/vm/runtime/synchronizer.hpp b/hotspot/src/share/vm/runtime/synchronizer.hpp index f2ff4c6f100..320a617cb39 100644 --- a/hotspot/src/share/vm/runtime/synchronizer.hpp +++ b/hotspot/src/share/vm/runtime/synchronizer.hpp @@ -72,6 +72,8 @@ class ObjectSynchronizer : AllStatic { static void notify(Handle obj, TRAPS); static void notifyall(Handle obj, TRAPS); + static bool quick_enter(oop obj, Thread* Self, BasicLock* Lock); + // Special internal-use-only method for use by JVM infrastructure // that needs to wait() on a java-level object but that can't risk // throwing unexpected InterruptedExecutionExceptions.