6852873: Reduce safepoint cleanup time
New optional flags to reduce inflated monitor cleanup times Reviewed-by: chrisphi, dice
This commit is contained in:
parent
d2703f3e8d
commit
9934e0af8e
@ -920,6 +920,10 @@ class CommandLineFlags {
|
||||
\
|
||||
product(intx, AlwaysInflate, 0, "(Unstable) Force inflation") \
|
||||
\
|
||||
product(intx, MonitorBound, 0, "Bound Monitor population") \
|
||||
\
|
||||
product(bool, MonitorInUseLists, false, "Track Monitors for Deflation") \
|
||||
\
|
||||
product(intx, Atomics, 0, \
|
||||
"(Unsafe,Unstable) Diagnostic - Controls emission of atomics") \
|
||||
\
|
||||
|
@ -185,6 +185,8 @@ struct SharedGlobals {
|
||||
} ;
|
||||
|
||||
static SharedGlobals GVars ;
|
||||
static int MonitorScavengeThreshold = 1000000 ;
|
||||
static volatile int ForceMonitorScavenge = 0 ; // Scavenge required and pending
|
||||
|
||||
|
||||
// Tunables ...
|
||||
@ -746,8 +748,85 @@ void Thread::muxRelease (volatile intptr_t * Lock) {
|
||||
ObjectMonitor * ObjectSynchronizer::gBlockList = NULL ;
|
||||
ObjectMonitor * volatile ObjectSynchronizer::gFreeList = NULL ;
|
||||
static volatile intptr_t ListLock = 0 ; // protects global monitor free-list cache
|
||||
static volatile int MonitorFreeCount = 0 ; // # on gFreeList
|
||||
static volatile int MonitorPopulation = 0 ; // # Extant -- in circulation
|
||||
#define CHAINMARKER ((oop)-1)
|
||||
|
||||
// Constraining monitor pool growth via MonitorBound ...
|
||||
//
|
||||
// The monitor pool is grow-only. We scavenge at STW safepoint-time, but the
|
||||
// the rate of scavenging is driven primarily by GC. As such, we can find
|
||||
// an inordinate number of monitors in circulation.
|
||||
// To avoid that scenario we can artificially induce a STW safepoint
|
||||
// if the pool appears to be growing past some reasonable bound.
|
||||
// Generally we favor time in space-time tradeoffs, but as there's no
|
||||
// natural back-pressure on the # of extant monitors we need to impose some
|
||||
// type of limit. Beware that if MonitorBound is set to too low a value
|
||||
// we could just loop. In addition, if MonitorBound is set to a low value
|
||||
// we'll incur more safepoints, which are harmful to performance.
|
||||
// See also: GuaranteedSafepointInterval
|
||||
//
|
||||
// As noted elsewhere, the correct long-term solution is to deflate at
|
||||
// monitorexit-time, in which case the number of inflated objects is bounded
|
||||
// by the number of threads. That policy obviates the need for scavenging at
|
||||
// STW safepoint time. As an aside, scavenging can be time-consuming when the
|
||||
// # of extant monitors is large. Unfortunately there's a day-1 assumption baked
|
||||
// into much HotSpot code that the object::monitor relationship, once established
|
||||
// or observed, will remain stable except over potential safepoints.
|
||||
//
|
||||
// We can use either a blocking synchronous VM operation or an async VM operation.
|
||||
// -- If we use a blocking VM operation :
|
||||
// Calls to ScavengeCheck() should be inserted only into 'safe' locations in paths
|
||||
// that lead to ::inflate() or ::omAlloc().
|
||||
// Even though the safepoint will not directly induce GC, a GC might
|
||||
// piggyback on the safepoint operation, so the caller should hold no naked oops.
|
||||
// Furthermore, monitor::object relationships are NOT necessarily stable over this call
|
||||
// unless the caller has made provisions to "pin" the object to the monitor, say
|
||||
// by incrementing the monitor's _count field.
|
||||
// -- If we use a non-blocking asynchronous VM operation :
|
||||
// the constraints above don't apply. The safepoint will fire in the future
|
||||
// at a more convenient time. On the other hand the latency between posting and
|
||||
// running the safepoint introduces or admits "slop" or laxity during which the
|
||||
// monitor population can climb further above the threshold. The monitor population,
|
||||
// however, tends to converge asymptotically over time to a count that's slightly
|
||||
// above the target value specified by MonitorBound. That is, we avoid unbounded
|
||||
// growth, albeit with some imprecision.
|
||||
//
|
||||
// The current implementation uses asynchronous VM operations.
|
||||
//
|
||||
// Ideally we'd check if (MonitorPopulation > MonitorBound) in omAlloc()
|
||||
// immediately before trying to grow the global list via allocation.
|
||||
// If the predicate was true then we'd induce a synchronous safepoint, wait
|
||||
// for the safepoint to complete, and then again to allocate from the global
|
||||
// free list. This approach is much simpler and precise, admitting no "slop".
|
||||
// Unfortunately we can't safely safepoint in the midst of omAlloc(), so
|
||||
// instead we use asynchronous safepoints.
|
||||
|
||||
static void InduceScavenge (Thread * Self, const char * Whence) {
|
||||
// Induce STW safepoint to trim monitors
|
||||
// Ultimately, this results in a call to deflate_idle_monitors() in the near future.
|
||||
// More precisely, trigger an asynchronous STW safepoint as the number
|
||||
// of active monitors passes the specified threshold.
|
||||
// TODO: assert thread state is reasonable
|
||||
|
||||
if (ForceMonitorScavenge == 0 && Atomic::xchg (1, &ForceMonitorScavenge) == 0) {
|
||||
if (Knob_Verbose) {
|
||||
::printf ("Monitor scavenge - Induced STW @%s (%d)\n", Whence, ForceMonitorScavenge) ;
|
||||
::fflush(stdout) ;
|
||||
}
|
||||
// Induce a 'null' safepoint to scavenge monitors
|
||||
// Must VM_Operation instance be heap allocated as the op will be enqueue and posted
|
||||
// to the VMthread and have a lifespan longer than that of this activation record.
|
||||
// The VMThread will delete the op when completed.
|
||||
VMThread::execute (new VM_ForceAsyncSafepoint()) ;
|
||||
|
||||
if (Knob_Verbose) {
|
||||
::printf ("Monitor scavenge - STW posted @%s (%d)\n", Whence, ForceMonitorScavenge) ;
|
||||
::fflush(stdout) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ObjectMonitor * ATTR ObjectSynchronizer::omAlloc (Thread * Self) {
|
||||
// A large MAXPRIVATE value reduces both list lock contention
|
||||
// and list coherency traffic, but also tends to increase the
|
||||
@ -770,6 +849,11 @@ ObjectMonitor * ATTR ObjectSynchronizer::omAlloc (Thread * Self) {
|
||||
Self->omFreeCount -- ;
|
||||
// CONSIDER: set m->FreeNext = BAD -- diagnostic hygiene
|
||||
guarantee (m->object() == NULL, "invariant") ;
|
||||
if (MonitorInUseLists) {
|
||||
m->FreeNext = Self->omInUseList;
|
||||
Self->omInUseList = m;
|
||||
Self->omInUseCount ++;
|
||||
}
|
||||
return m ;
|
||||
}
|
||||
|
||||
@ -784,6 +868,7 @@ ObjectMonitor * ATTR ObjectSynchronizer::omAlloc (Thread * Self) {
|
||||
// on various locks.
|
||||
Thread::muxAcquire (&ListLock, "omAlloc") ;
|
||||
for (int i = Self->omFreeProvision; --i >= 0 && gFreeList != NULL; ) {
|
||||
MonitorFreeCount --;
|
||||
ObjectMonitor * take = gFreeList ;
|
||||
gFreeList = take->FreeNext ;
|
||||
guarantee (take->object() == NULL, "invariant") ;
|
||||
@ -796,6 +881,15 @@ ObjectMonitor * ATTR ObjectSynchronizer::omAlloc (Thread * Self) {
|
||||
if (Self->omFreeProvision > MAXPRIVATE ) Self->omFreeProvision = MAXPRIVATE ;
|
||||
TEVENT (omFirst - reprovision) ;
|
||||
continue ;
|
||||
|
||||
const int mx = MonitorBound ;
|
||||
if (mx > 0 && (MonitorPopulation-MonitorFreeCount) > mx) {
|
||||
// We can't safely induce a STW safepoint from omAlloc() as our thread
|
||||
// state may not be appropriate for such activities and callers may hold
|
||||
// naked oops, so instead we defer the action.
|
||||
InduceScavenge (Self, "omAlloc") ;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// 3: allocate a block of new ObjectMonitors
|
||||
@ -836,6 +930,8 @@ ObjectMonitor * ATTR ObjectSynchronizer::omAlloc (Thread * Self) {
|
||||
// Acquire the ListLock to manipulate BlockList and FreeList.
|
||||
// An Oyama-Taura-Yonezawa scheme might be more efficient.
|
||||
Thread::muxAcquire (&ListLock, "omAlloc [2]") ;
|
||||
MonitorPopulation += _BLOCKSIZE-1;
|
||||
MonitorFreeCount += _BLOCKSIZE-1;
|
||||
|
||||
// Add the new block to the list of extant blocks (gBlockList).
|
||||
// The very first objectMonitor in a block is reserved and dedicated.
|
||||
@ -894,7 +990,9 @@ void ObjectSynchronizer::omFlush (Thread * Self) {
|
||||
if (List == NULL) return ;
|
||||
ObjectMonitor * Tail = NULL ;
|
||||
ObjectMonitor * s ;
|
||||
int Tally = 0;
|
||||
for (s = List ; s != NULL ; s = s->FreeNext) {
|
||||
Tally ++ ;
|
||||
Tail = s ;
|
||||
guarantee (s->object() == NULL, "invariant") ;
|
||||
guarantee (!s->is_busy(), "invariant") ;
|
||||
@ -906,6 +1004,7 @@ void ObjectSynchronizer::omFlush (Thread * Self) {
|
||||
Thread::muxAcquire (&ListLock, "omFlush") ;
|
||||
Tail->FreeNext = gFreeList ;
|
||||
gFreeList = List ;
|
||||
MonitorFreeCount += Tally;
|
||||
Thread::muxRelease (&ListLock) ;
|
||||
TEVENT (omFlush) ;
|
||||
}
|
||||
@ -1747,16 +1846,15 @@ void ObjectSynchronizer::oops_do(OopClosure* f) {
|
||||
// Having a large number of monitors in-circulation negatively
|
||||
// impacts the performance of some applications (e.g., PointBase).
|
||||
// Broadly, we want to minimize the # of monitors in circulation.
|
||||
// Alternately, we could partition the active monitors into sub-lists
|
||||
// of those that need scanning and those that do not.
|
||||
// Specifically, we would add a new sub-list of objectmonitors
|
||||
// that are in-circulation and potentially active. deflate_idle_monitors()
|
||||
// would scan only that list. Other monitors could reside on a quiescent
|
||||
// list. Such sequestered monitors wouldn't need to be scanned by
|
||||
// deflate_idle_monitors(). omAlloc() would first check the global free list,
|
||||
// then the quiescent list, and, failing those, would allocate a new block.
|
||||
// Deflate_idle_monitors() would scavenge and move monitors to the
|
||||
// quiescent list.
|
||||
//
|
||||
// We have added a flag, MonitorInUseLists, which creates a list
|
||||
// of active monitors for each thread. deflate_idle_monitors()
|
||||
// only scans the per-thread inuse lists. omAlloc() puts all
|
||||
// assigned monitors on the per-thread list. deflate_idle_monitors()
|
||||
// returns the non-busy monitors to the global free list.
|
||||
// An alternative could have used a single global inuse list. The
|
||||
// downside would have been the additional cost of acquiring the global list lock
|
||||
// for every omAlloc().
|
||||
//
|
||||
// Perversely, the heap size -- and thus the STW safepoint rate --
|
||||
// typically drives the scavenge rate. Large heaps can mean infrequent GC,
|
||||
@ -1769,18 +1867,100 @@ void ObjectSynchronizer::oops_do(OopClosure* f) {
|
||||
// An even better solution would be to deflate on-the-fly, aggressively,
|
||||
// at monitorexit-time as is done in EVM's metalock or Relaxed Locks.
|
||||
|
||||
|
||||
// Deflate a single monitor if not in use
|
||||
// Return true if deflated, false if in use
|
||||
bool ObjectSynchronizer::deflate_monitor(ObjectMonitor* mid, oop obj,
|
||||
ObjectMonitor** FreeHeadp, ObjectMonitor** FreeTailp) {
|
||||
bool deflated;
|
||||
// Normal case ... The monitor is associated with obj.
|
||||
guarantee (obj->mark() == markOopDesc::encode(mid), "invariant") ;
|
||||
guarantee (mid == obj->mark()->monitor(), "invariant");
|
||||
guarantee (mid->header()->is_neutral(), "invariant");
|
||||
|
||||
if (mid->is_busy()) {
|
||||
if (ClearResponsibleAtSTW) mid->_Responsible = NULL ;
|
||||
deflated = false;
|
||||
} else {
|
||||
// Deflate the monitor if it is no longer being used
|
||||
// It's idle - scavenge and return to the global free list
|
||||
// plain old deflation ...
|
||||
TEVENT (deflate_idle_monitors - scavenge1) ;
|
||||
if (TraceMonitorInflation) {
|
||||
if (obj->is_instance()) {
|
||||
ResourceMark rm;
|
||||
tty->print_cr("Deflating object " INTPTR_FORMAT " , mark " INTPTR_FORMAT " , type %s",
|
||||
(intptr_t) obj, (intptr_t) obj->mark(), Klass::cast(obj->klass())->external_name());
|
||||
}
|
||||
}
|
||||
|
||||
// Restore the header back to obj
|
||||
obj->release_set_mark(mid->header());
|
||||
mid->clear();
|
||||
|
||||
assert (mid->object() == NULL, "invariant") ;
|
||||
|
||||
// Move the object to the working free list defined by FreeHead,FreeTail.
|
||||
if (*FreeHeadp == NULL) *FreeHeadp = mid;
|
||||
if (*FreeTailp != NULL) {
|
||||
ObjectMonitor * prevtail = *FreeTailp;
|
||||
prevtail->FreeNext = mid;
|
||||
}
|
||||
*FreeTailp = mid;
|
||||
deflated = true;
|
||||
}
|
||||
return deflated;
|
||||
}
|
||||
|
||||
void ObjectSynchronizer::deflate_idle_monitors() {
|
||||
assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
|
||||
int nInuse = 0 ; // currently associated with objects
|
||||
int nInCirculation = 0 ; // extant
|
||||
int nScavenged = 0 ; // reclaimed
|
||||
bool deflated = false;
|
||||
|
||||
ObjectMonitor * FreeHead = NULL ; // Local SLL of scavenged monitors
|
||||
ObjectMonitor * FreeTail = NULL ;
|
||||
|
||||
// Iterate over all extant monitors - Scavenge all idle monitors.
|
||||
TEVENT (deflate_idle_monitors) ;
|
||||
for (ObjectMonitor* block = gBlockList; block != NULL; block = next(block)) {
|
||||
// Prevent omFlush from changing mids in Thread dtor's during deflation
|
||||
// And in case the vm thread is acquiring a lock during a safepoint
|
||||
// See e.g. 6320749
|
||||
Thread::muxAcquire (&ListLock, "scavenge - return") ;
|
||||
|
||||
if (MonitorInUseLists) {
|
||||
ObjectMonitor* mid;
|
||||
ObjectMonitor* next;
|
||||
ObjectMonitor* curmidinuse;
|
||||
for (JavaThread* cur = Threads::first(); cur != NULL; cur = cur->next()) {
|
||||
curmidinuse = NULL;
|
||||
for (mid = cur->omInUseList; mid != NULL; ) {
|
||||
oop obj = (oop) mid->object();
|
||||
deflated = false;
|
||||
if (obj != NULL) {
|
||||
deflated = deflate_monitor(mid, obj, &FreeHead, &FreeTail);
|
||||
}
|
||||
if (deflated) {
|
||||
// extract from per-thread in-use-list
|
||||
if (mid == cur->omInUseList) {
|
||||
cur->omInUseList = mid->FreeNext;
|
||||
} else if (curmidinuse != NULL) {
|
||||
curmidinuse->FreeNext = mid->FreeNext; // maintain the current thread inuselist
|
||||
}
|
||||
next = mid->FreeNext;
|
||||
mid->FreeNext = NULL; // This mid is current tail in the FreeHead list
|
||||
mid = next;
|
||||
cur->omInUseCount--;
|
||||
nScavenged ++ ;
|
||||
} else {
|
||||
curmidinuse = mid;
|
||||
mid = mid->FreeNext;
|
||||
nInuse ++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else for (ObjectMonitor* block = gBlockList; block != NULL; block = next(block)) {
|
||||
// Iterate over all extant monitors - Scavenge all idle monitors.
|
||||
assert(block->object() == CHAINMARKER, "must be a block header");
|
||||
nInCirculation += _BLOCKSIZE ;
|
||||
for (int i = 1 ; i < _BLOCKSIZE; i++) {
|
||||
@ -1795,61 +1975,39 @@ void ObjectSynchronizer::deflate_idle_monitors() {
|
||||
guarantee (!mid->is_busy(), "invariant") ;
|
||||
continue ;
|
||||
}
|
||||
deflated = deflate_monitor(mid, obj, &FreeHead, &FreeTail);
|
||||
|
||||
// Normal case ... The monitor is associated with obj.
|
||||
guarantee (obj->mark() == markOopDesc::encode(mid), "invariant") ;
|
||||
guarantee (mid == obj->mark()->monitor(), "invariant");
|
||||
guarantee (mid->header()->is_neutral(), "invariant");
|
||||
|
||||
if (mid->is_busy()) {
|
||||
if (ClearResponsibleAtSTW) mid->_Responsible = NULL ;
|
||||
nInuse ++ ;
|
||||
if (deflated) {
|
||||
mid->FreeNext = NULL ;
|
||||
nScavenged ++ ;
|
||||
} else {
|
||||
// Deflate the monitor if it is no longer being used
|
||||
// It's idle - scavenge and return to the global free list
|
||||
// plain old deflation ...
|
||||
TEVENT (deflate_idle_monitors - scavenge1) ;
|
||||
if (TraceMonitorInflation) {
|
||||
if (obj->is_instance()) {
|
||||
ResourceMark rm;
|
||||
tty->print_cr("Deflating object " INTPTR_FORMAT " , mark " INTPTR_FORMAT " , type %s",
|
||||
(intptr_t) obj, (intptr_t) obj->mark(), Klass::cast(obj->klass())->external_name());
|
||||
}
|
||||
}
|
||||
|
||||
// Restore the header back to obj
|
||||
obj->release_set_mark(mid->header());
|
||||
mid->clear();
|
||||
|
||||
assert (mid->object() == NULL, "invariant") ;
|
||||
|
||||
// Move the object to the working free list defined by FreeHead,FreeTail.
|
||||
mid->FreeNext = NULL ;
|
||||
if (FreeHead == NULL) FreeHead = mid ;
|
||||
if (FreeTail != NULL) FreeTail->FreeNext = mid ;
|
||||
FreeTail = mid ;
|
||||
nScavenged ++ ;
|
||||
nInuse ++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MonitorFreeCount += nScavenged;
|
||||
|
||||
// Consider: audit gFreeList to ensure that MonitorFreeCount and list agree.
|
||||
|
||||
if (Knob_Verbose) {
|
||||
::printf ("Deflate: InCirc=%d InUse=%d Scavenged=%d ForceMonitorScavenge=%d : pop=%d free=%d\n",
|
||||
nInCirculation, nInuse, nScavenged, ForceMonitorScavenge,
|
||||
MonitorPopulation, MonitorFreeCount) ;
|
||||
::fflush(stdout) ;
|
||||
}
|
||||
|
||||
ForceMonitorScavenge = 0; // Reset
|
||||
|
||||
// Move the scavenged monitors back to the global free list.
|
||||
// In theory we don't need the freelist lock as we're at a STW safepoint.
|
||||
// omAlloc() and omFree() can only be called while a thread is _not in safepoint state.
|
||||
// But it's remotely possible that omFlush() or release_monitors_owned_by_thread()
|
||||
// might be called while not at a global STW safepoint. In the interest of
|
||||
// safety we protect the following access with ListLock.
|
||||
// An even more conservative and prudent approach would be to guard
|
||||
// the main loop in scavenge_idle_monitors() with ListLock.
|
||||
if (FreeHead != NULL) {
|
||||
guarantee (FreeTail != NULL && nScavenged > 0, "invariant") ;
|
||||
assert (FreeTail->FreeNext == NULL, "invariant") ;
|
||||
// constant-time list splice - prepend scavenged segment to gFreeList
|
||||
Thread::muxAcquire (&ListLock, "scavenge - return") ;
|
||||
FreeTail->FreeNext = gFreeList ;
|
||||
gFreeList = FreeHead ;
|
||||
Thread::muxRelease (&ListLock) ;
|
||||
}
|
||||
Thread::muxRelease (&ListLock) ;
|
||||
|
||||
if (_sync_Deflations != NULL) _sync_Deflations->inc(nScavenged) ;
|
||||
if (_sync_MonExtant != NULL) _sync_MonExtant ->set_value(nInCirculation);
|
||||
|
@ -150,6 +150,8 @@ class ObjectSynchronizer : AllStatic {
|
||||
// Basically we deflate all monitors that are not busy.
|
||||
// An adaptive profile-based deflation policy could be used if needed
|
||||
static void deflate_idle_monitors();
|
||||
static bool deflate_monitor(ObjectMonitor* mid, oop obj, ObjectMonitor** FreeHeadp,
|
||||
ObjectMonitor** FreeTailp);
|
||||
static void oops_do(OopClosure* f);
|
||||
|
||||
// debugging
|
||||
|
@ -139,6 +139,8 @@ Thread::Thread() {
|
||||
omFreeList = NULL ;
|
||||
omFreeCount = 0 ;
|
||||
omFreeProvision = 32 ;
|
||||
omInUseList = NULL ;
|
||||
omInUseCount = 0 ;
|
||||
|
||||
_SR_lock = new Monitor(Mutex::suspend_resume, "SR_lock", true);
|
||||
_suspend_flags = 0;
|
||||
|
@ -225,6 +225,8 @@ class Thread: public ThreadShadow {
|
||||
ObjectMonitor * omFreeList ;
|
||||
int omFreeCount ; // length of omFreeList
|
||||
int omFreeProvision ; // reload chunk size
|
||||
ObjectMonitor * omInUseList; // SLL to track monitors in circulation
|
||||
int omInUseCount; // length of omInUseList
|
||||
|
||||
public:
|
||||
enum {
|
||||
@ -493,7 +495,6 @@ public:
|
||||
|
||||
static ByteSize stack_base_offset() { return byte_offset_of(Thread, _stack_base ); }
|
||||
static ByteSize stack_size_offset() { return byte_offset_of(Thread, _stack_size ); }
|
||||
static ByteSize omFreeList_offset() { return byte_offset_of(Thread, omFreeList); }
|
||||
|
||||
#define TLAB_FIELD_OFFSET(name) \
|
||||
static ByteSize tlab_##name##_offset() { return byte_offset_of(Thread, _tlab) + ThreadLocalAllocBuffer::name##_offset(); }
|
||||
|
Loading…
Reference in New Issue
Block a user