8225631: Consider replacing muxAcquire/Release with PlatformMonitor
Reviewed-by: coleenp, dcubed, kbarrett
This commit is contained in:
parent
646c20022c
commit
99eac53580
@ -1520,7 +1520,7 @@ void os::PlatformEvent::unpark() {
|
||||
// shake out uses of park() and unpark() without checking state conditions
|
||||
// properly. This spurious return doesn't manifest itself in any user code
|
||||
// but only in the correctly written condition checking loops of ObjectMonitor,
|
||||
// Mutex/Monitor, Thread::muxAcquire and JavaThread::sleep
|
||||
// Mutex/Monitor, and JavaThread::sleep
|
||||
|
||||
if (Atomic::xchg(&_event, 1) >= 0) return;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -125,7 +125,6 @@ class ParkEvent : public os::PlatformEvent {
|
||||
public:
|
||||
// MCS-CLH list linkage and Native Mutex/Monitor
|
||||
ParkEvent * volatile ListNext ;
|
||||
volatile intptr_t OnList ;
|
||||
volatile int TState ;
|
||||
volatile int Notified ; // for native monitor construct
|
||||
|
||||
@ -146,7 +145,6 @@ class ParkEvent : public os::PlatformEvent {
|
||||
AssociatedWith = NULL ;
|
||||
FreeNext = NULL ;
|
||||
ListNext = NULL ;
|
||||
OnList = 0 ;
|
||||
TState = 0 ;
|
||||
Notified = 0 ;
|
||||
}
|
||||
|
@ -227,8 +227,14 @@ int dtrace_waited_probe(ObjectMonitor* monitor, Handle obj, Thread* thr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define NINFLATIONLOCKS 256
|
||||
static volatile intptr_t gInflationLocks[NINFLATIONLOCKS];
|
||||
static const int NINFLATIONLOCKS = 256;
|
||||
static os::PlatformMutex* gInflationLocks[NINFLATIONLOCKS];
|
||||
|
||||
void ObjectSynchronizer::initialize() {
|
||||
for (int i = 0; i < NINFLATIONLOCKS; i++) {
|
||||
gInflationLocks[i] = new os::PlatformMutex();
|
||||
}
|
||||
}
|
||||
|
||||
static MonitorList _in_use_list;
|
||||
// The ratio of the current _in_use_list count to the ceiling is used
|
||||
@ -749,13 +755,7 @@ static markWord read_stable_mark(oop obj) {
|
||||
|
||||
// The object is being inflated by some other thread.
|
||||
// The caller of read_stable_mark() must wait for inflation to complete.
|
||||
// Avoid live-lock
|
||||
// TODO: consider calling SafepointSynchronize::do_call_back() while
|
||||
// spinning to see if there's a safepoint pending. If so, immediately
|
||||
// yielding or blocking would be appropriate. Avoid spinning while
|
||||
// there is a safepoint pending.
|
||||
// TODO: add inflation contention performance counters.
|
||||
// TODO: restrict the aggregate number of spinners.
|
||||
// Avoid live-lock.
|
||||
|
||||
++its;
|
||||
if (its > 10000 || !os::is_MP()) {
|
||||
@ -775,15 +775,15 @@ static markWord read_stable_mark(oop obj) {
|
||||
// and calling park(). When inflation was complete the thread that accomplished inflation
|
||||
// would detach the list and set the markword to inflated with a single CAS and
|
||||
// then for each thread on the list, set the flag and unpark() the thread.
|
||||
// This is conceptually similar to muxAcquire-muxRelease, except that muxRelease
|
||||
// wakes at most one thread whereas we need to wake the entire list.
|
||||
|
||||
// Index into the lock array based on the current object address.
|
||||
static_assert(is_power_of_2(NINFLATIONLOCKS), "must be");
|
||||
int ix = (cast_from_oop<intptr_t>(obj) >> 5) & (NINFLATIONLOCKS-1);
|
||||
int YieldThenBlock = 0;
|
||||
assert(ix >= 0 && ix < NINFLATIONLOCKS, "invariant");
|
||||
assert((NINFLATIONLOCKS & (NINFLATIONLOCKS-1)) == 0, "invariant");
|
||||
Thread::muxAcquire(gInflationLocks + ix, "gInflationLock");
|
||||
gInflationLocks[ix]->lock();
|
||||
while (obj->mark() == markWord::INFLATING()) {
|
||||
// Beware: NakedYield() is advisory and has almost no effect on some platforms
|
||||
// Beware: naked_yield() is advisory and has almost no effect on some platforms
|
||||
// so we periodically call self->_ParkEvent->park(1).
|
||||
// We use a mixed spin/yield/block mechanism.
|
||||
if ((YieldThenBlock++) >= 16) {
|
||||
@ -792,7 +792,7 @@ static markWord read_stable_mark(oop obj) {
|
||||
os::naked_yield();
|
||||
}
|
||||
}
|
||||
Thread::muxRelease(gInflationLocks + ix);
|
||||
gInflationLocks[ix]->unlock();
|
||||
}
|
||||
} else {
|
||||
SpinPause(); // SMP-polite spinning
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "oops/markWord.hpp"
|
||||
#include "runtime/basicLock.hpp"
|
||||
#include "runtime/handles.hpp"
|
||||
#include "runtime/os.hpp"
|
||||
#include "runtime/perfData.hpp"
|
||||
|
||||
class LogStream;
|
||||
@ -114,6 +115,9 @@ class ObjectSynchronizer : AllStatic {
|
||||
static void release_monitors_owned_by_thread(TRAPS);
|
||||
static void monitors_iterate(MonitorClosure* m);
|
||||
|
||||
// Initialize the gInflationLocks
|
||||
static void initialize();
|
||||
|
||||
// GC: we current use aggressive monitor deflation policy
|
||||
// Basically we try to deflate all monitors that are not busy.
|
||||
static size_t deflate_idle_monitors();
|
||||
|
@ -291,7 +291,6 @@ Thread::Thread() {
|
||||
// The stack would act as a cache to avoid calls to ParkEvent::Allocate()
|
||||
// and ::Release()
|
||||
_ParkEvent = ParkEvent::Allocate(this);
|
||||
_MuxEvent = ParkEvent::Allocate(this);
|
||||
|
||||
#ifdef CHECK_UNHANDLED_OOPS
|
||||
if (CheckUnhandledOops) {
|
||||
@ -439,7 +438,6 @@ Thread::~Thread() {
|
||||
// It's possible we can encounter a null _ParkEvent, etc., in stillborn threads.
|
||||
// We NULL out the fields for good hygiene.
|
||||
ParkEvent::Release(_ParkEvent); _ParkEvent = NULL;
|
||||
ParkEvent::Release(_MuxEvent); _MuxEvent = NULL;
|
||||
|
||||
delete handle_area();
|
||||
delete metadata_handles();
|
||||
@ -3560,6 +3558,7 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
|
||||
|
||||
// Initialize Java-Level synchronization subsystem
|
||||
ObjectMonitor::Initialize();
|
||||
ObjectSynchronizer::initialize();
|
||||
|
||||
// Initialize global modules
|
||||
jint status = init_globals();
|
||||
@ -4582,22 +4581,11 @@ void Threads::print_threads_compiling(outputStream* st, char* buf, int buflen, b
|
||||
}
|
||||
|
||||
|
||||
// Internal SpinLock and Mutex
|
||||
// Based on ParkEvent
|
||||
|
||||
// Ad-hoc mutual exclusion primitives: SpinLock and Mux
|
||||
// Ad-hoc mutual exclusion primitives: SpinLock
|
||||
//
|
||||
// We employ SpinLocks _only for low-contention, fixed-length
|
||||
// short-duration critical sections where we're concerned
|
||||
// about native mutex_t or HotSpot Mutex:: latency.
|
||||
// The mux construct provides a spin-then-block mutual exclusion
|
||||
// mechanism.
|
||||
//
|
||||
// Testing has shown that contention on the ListLock guarding gFreeList
|
||||
// is common. If we implement ListLock as a simple SpinLock it's common
|
||||
// for the JVM to devolve to yielding with little progress. This is true
|
||||
// despite the fact that the critical sections protected by ListLock are
|
||||
// extremely short.
|
||||
//
|
||||
// TODO-FIXME: ListLock should be of type SpinLock.
|
||||
// We should make this a 1st-class type, integrated into the lock
|
||||
@ -4650,150 +4638,6 @@ void Thread::SpinRelease(volatile int * adr) {
|
||||
*adr = 0;
|
||||
}
|
||||
|
||||
// muxAcquire and muxRelease:
|
||||
//
|
||||
// * muxAcquire and muxRelease support a single-word lock-word construct.
|
||||
// The LSB of the word is set IFF the lock is held.
|
||||
// The remainder of the word points to the head of a singly-linked list
|
||||
// of threads blocked on the lock.
|
||||
//
|
||||
// * The current implementation of muxAcquire-muxRelease uses its own
|
||||
// dedicated Thread._MuxEvent instance. If we're interested in
|
||||
// minimizing the peak number of extant ParkEvent instances then
|
||||
// we could eliminate _MuxEvent and "borrow" _ParkEvent as long
|
||||
// as certain invariants were satisfied. Specifically, care would need
|
||||
// to be taken with regards to consuming unpark() "permits".
|
||||
// A safe rule of thumb is that a thread would never call muxAcquire()
|
||||
// if it's enqueued (cxq, EntryList, WaitList, etc) and will subsequently
|
||||
// park(). Otherwise the _ParkEvent park() operation in muxAcquire() could
|
||||
// consume an unpark() permit intended for monitorenter, for instance.
|
||||
// One way around this would be to widen the restricted-range semaphore
|
||||
// implemented in park(). Another alternative would be to provide
|
||||
// multiple instances of the PlatformEvent() for each thread. One
|
||||
// instance would be dedicated to muxAcquire-muxRelease, for instance.
|
||||
//
|
||||
// * Usage:
|
||||
// -- Only as leaf locks
|
||||
// -- for short-term locking only as muxAcquire does not perform
|
||||
// thread state transitions.
|
||||
//
|
||||
// Alternatives:
|
||||
// * We could implement muxAcquire and muxRelease with MCS or CLH locks
|
||||
// but with parking or spin-then-park instead of pure spinning.
|
||||
// * Use Taura-Oyama-Yonenzawa locks.
|
||||
// * It's possible to construct a 1-0 lock if we encode the lockword as
|
||||
// (List,LockByte). Acquire will CAS the full lockword while Release
|
||||
// will STB 0 into the LockByte. The 1-0 scheme admits stranding, so
|
||||
// acquiring threads use timers (ParkTimed) to detect and recover from
|
||||
// the stranding window. Thread/Node structures must be aligned on 256-byte
|
||||
// boundaries by using placement-new.
|
||||
// * Augment MCS with advisory back-link fields maintained with CAS().
|
||||
// Pictorially: LockWord -> T1 <-> T2 <-> T3 <-> ... <-> Tn <-> Owner.
|
||||
// The validity of the backlinks must be ratified before we trust the value.
|
||||
// If the backlinks are invalid the exiting thread must back-track through the
|
||||
// the forward links, which are always trustworthy.
|
||||
// * Add a successor indication. The LockWord is currently encoded as
|
||||
// (List, LOCKBIT:1). We could also add a SUCCBIT or an explicit _succ variable
|
||||
// to provide the usual futile-wakeup optimization.
|
||||
// See RTStt for details.
|
||||
//
|
||||
|
||||
|
||||
const intptr_t LOCKBIT = 1;
|
||||
|
||||
void Thread::muxAcquire(volatile intptr_t * Lock, const char * LockName) {
|
||||
intptr_t w = Atomic::cmpxchg(Lock, (intptr_t)0, LOCKBIT);
|
||||
if (w == 0) return;
|
||||
if ((w & LOCKBIT) == 0 && Atomic::cmpxchg(Lock, w, w|LOCKBIT) == w) {
|
||||
return;
|
||||
}
|
||||
|
||||
ParkEvent * const Self = Thread::current()->_MuxEvent;
|
||||
assert((intptr_t(Self) & LOCKBIT) == 0, "invariant");
|
||||
for (;;) {
|
||||
int its = (os::is_MP() ? 100 : 0) + 1;
|
||||
|
||||
// Optional spin phase: spin-then-park strategy
|
||||
while (--its >= 0) {
|
||||
w = *Lock;
|
||||
if ((w & LOCKBIT) == 0 && Atomic::cmpxchg(Lock, w, w|LOCKBIT) == w) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Self->reset();
|
||||
Self->OnList = intptr_t(Lock);
|
||||
// The following fence() isn't _strictly necessary as the subsequent
|
||||
// CAS() both serializes execution and ratifies the fetched *Lock value.
|
||||
OrderAccess::fence();
|
||||
for (;;) {
|
||||
w = *Lock;
|
||||
if ((w & LOCKBIT) == 0) {
|
||||
if (Atomic::cmpxchg(Lock, w, w|LOCKBIT) == w) {
|
||||
Self->OnList = 0; // hygiene - allows stronger asserts
|
||||
return;
|
||||
}
|
||||
continue; // Interference -- *Lock changed -- Just retry
|
||||
}
|
||||
assert(w & LOCKBIT, "invariant");
|
||||
Self->ListNext = (ParkEvent *) (w & ~LOCKBIT);
|
||||
if (Atomic::cmpxchg(Lock, w, intptr_t(Self)|LOCKBIT) == w) break;
|
||||
}
|
||||
|
||||
while (Self->OnList != 0) {
|
||||
Self->park();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Release() must extract a successor from the list and then wake that thread.
|
||||
// It can "pop" the front of the list or use a detach-modify-reattach (DMR) scheme
|
||||
// similar to that used by ParkEvent::Allocate() and ::Release(). DMR-based
|
||||
// Release() would :
|
||||
// (A) CAS() or swap() null to *Lock, releasing the lock and detaching the list.
|
||||
// (B) Extract a successor from the private list "in-hand"
|
||||
// (C) attempt to CAS() the residual back into *Lock over null.
|
||||
// If there were any newly arrived threads and the CAS() would fail.
|
||||
// In that case Release() would detach the RATs, re-merge the list in-hand
|
||||
// with the RATs and repeat as needed. Alternately, Release() might
|
||||
// detach and extract a successor, but then pass the residual list to the wakee.
|
||||
// The wakee would be responsible for reattaching and remerging before it
|
||||
// competed for the lock.
|
||||
//
|
||||
// Both "pop" and DMR are immune from ABA corruption -- there can be
|
||||
// multiple concurrent pushers, but only one popper or detacher.
|
||||
// This implementation pops from the head of the list. This is unfair,
|
||||
// but tends to provide excellent throughput as hot threads remain hot.
|
||||
// (We wake recently run threads first).
|
||||
//
|
||||
// All paths through muxRelease() will execute a CAS.
|
||||
// Release consistency -- We depend on the CAS in muxRelease() to provide full
|
||||
// bidirectional fence/MEMBAR semantics, ensuring that all prior memory operations
|
||||
// executed within the critical section are complete and globally visible before the
|
||||
// store (CAS) to the lock-word that releases the lock becomes globally visible.
|
||||
void Thread::muxRelease(volatile intptr_t * Lock) {
|
||||
for (;;) {
|
||||
const intptr_t w = Atomic::cmpxchg(Lock, LOCKBIT, (intptr_t)0);
|
||||
assert(w & LOCKBIT, "invariant");
|
||||
if (w == LOCKBIT) return;
|
||||
ParkEvent * const List = (ParkEvent *) (w & ~LOCKBIT);
|
||||
assert(List != NULL, "invariant");
|
||||
assert(List->OnList == intptr_t(Lock), "invariant");
|
||||
ParkEvent * const nxt = List->ListNext;
|
||||
guarantee((intptr_t(nxt) & LOCKBIT) == 0, "invariant");
|
||||
|
||||
// The following CAS() releases the lock and pops the head element.
|
||||
// The CAS() also ratifies the previously fetched lock-word value.
|
||||
if (Atomic::cmpxchg(Lock, w, intptr_t(nxt)) != w) {
|
||||
continue;
|
||||
}
|
||||
List->OnList = 0;
|
||||
OrderAccess::fence();
|
||||
List->unpark();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Threads::verify() {
|
||||
ALL_JAVA_THREADS(p) {
|
||||
|
@ -827,8 +827,8 @@ protected:
|
||||
public:
|
||||
volatile intptr_t _Stalled;
|
||||
volatile int _TypeTag;
|
||||
ParkEvent * _ParkEvent; // for Object monitors and JVMTI raw monitors
|
||||
ParkEvent * _MuxEvent; // for low-level muxAcquire-muxRelease
|
||||
ParkEvent * _ParkEvent; // for Object monitors, JVMTI raw monitors,
|
||||
// and ObjectSynchronizer::read_stable_mark
|
||||
int NativeSyncRecursion; // diagnostic
|
||||
|
||||
volatile int _OnTrap; // Resume-at IP delta
|
||||
@ -837,13 +837,10 @@ protected:
|
||||
jint _hashStateY;
|
||||
jint _hashStateZ;
|
||||
|
||||
// Low-level leaf-lock primitives used to implement synchronization
|
||||
// and native monitor-mutex infrastructure.
|
||||
// Low-level leaf-lock primitives used to implement synchronization.
|
||||
// Not for general synchronization use.
|
||||
static void SpinAcquire(volatile int * Lock, const char * Name);
|
||||
static void SpinRelease(volatile int * Lock);
|
||||
static void muxAcquire(volatile intptr_t * Lock, const char * Name);
|
||||
static void muxRelease(volatile intptr_t * Lock);
|
||||
};
|
||||
|
||||
// Inline implementation of Thread::current()
|
||||
|
Loading…
Reference in New Issue
Block a user