8047104: cleanup misc issues prior to Contended Locking reorder and cache

Checkpoint misc cleanups for Contended Locking prior to first optimization bucket.

Reviewed-by: dholmes, sspitsyn, dice
This commit is contained in:
Daniel D. Daugherty 2014-07-03 11:07:51 -07:00
parent a145a396fc
commit 2876714328
16 changed files with 173 additions and 200 deletions

View File

@ -4218,22 +4218,12 @@ static struct timespec* compute_abstime(struct timespec* abstime, jlong millis)
return abstime;
}
// Test-and-clear _Event, always leaves _Event set to 0, returns immediately.
// Conceptually TryPark() should be equivalent to park(0).
int os::PlatformEvent::TryPark() {
for (;;) {
const int v = _Event;
guarantee((v == 0) || (v == 1), "invariant");
if (Atomic::cmpxchg(0, &_Event, v) == v) return v;
}
}
void os::PlatformEvent::park() { // AKA "down()"
// Invariant: Only the thread associated with the Event/PlatformEvent
// may call park().
// TODO: assert that _Assoc != NULL or _Assoc == Self
assert(_nParked == 0, "invariant");
int v;
for (;;) {
v = _Event;
@ -4333,8 +4323,7 @@ void os::PlatformEvent::unpark() {
// 1 :=> 1
// -1 :=> either 0 or 1; must signal target thread
// That is, we can safely transition _Event from -1 to either
// 0 or 1. Forcing 1 is slightly more efficient for back-to-back
// unpark() calls.
// 0 or 1.
// See also: "Semaphores in Plan 9" by Mullender & Cox
//
// Note: Forcing a transition from "-1" to "1" on an unpark() means
@ -4541,10 +4530,9 @@ void Parker::park(bool isAbsolute, jlong time) {
}
void Parker::unpark() {
int s, status;
status = pthread_mutex_lock(_mutex);
int status = pthread_mutex_lock(_mutex);
assert(status == 0, "invariant");
s = _counter;
const int s = _counter;
_counter = 1;
if (s < 1) {
if (WorkAroundNPTLTimedWaitHang) {

View File

@ -219,7 +219,6 @@ class PlatformEvent : public CHeapObj<mtInternal> {
int fired() { return _Event; }
void park();
void unpark();
int TryPark();
int park(jlong millis);
void SetAssociation(Thread * a) { _Assoc = a; }
};

View File

@ -5457,22 +5457,12 @@ static struct timespec* compute_abstime(timespec* abstime, jlong millis) {
return abstime;
}
// Test-and-clear _Event, always leaves _Event set to 0, returns immediately.
// Conceptually TryPark() should be equivalent to park(0).
int os::PlatformEvent::TryPark() {
for (;;) {
const int v = _Event;
guarantee((v == 0) || (v == 1), "invariant");
if (Atomic::cmpxchg(0, &_Event, v) == v) return v;
}
}
void os::PlatformEvent::park() { // AKA "down()"
// Invariant: Only the thread associated with the Event/PlatformEvent
// may call park().
// TODO: assert that _Assoc != NULL or _Assoc == Self
assert(_nParked == 0, "invariant");
int v;
for (;;) {
v = _Event;
@ -5572,8 +5562,7 @@ void os::PlatformEvent::unpark() {
// 1 :=> 1
// -1 :=> either 0 or 1; must signal target thread
// That is, we can safely transition _Event from -1 to either
// 0 or 1. Forcing 1 is slightly more efficient for back-to-back
// unpark() calls.
// 0 or 1.
// See also: "Semaphores in Plan 9" by Mullender & Cox
//
// Note: Forcing a transition from "-1" to "1" on an unpark() means
@ -5801,10 +5790,9 @@ void Parker::park(bool isAbsolute, jlong time) {
}
void Parker::unpark() {
int s, status;
status = pthread_mutex_lock(_mutex);
int status = pthread_mutex_lock(_mutex);
assert(status == 0, "invariant");
s = _counter;
const int s = _counter;
_counter = 1;
if (s < 1) {
// thread might be parked

View File

@ -315,7 +315,6 @@ class PlatformEvent : public CHeapObj<mtInternal> {
int fired() { return _Event; }
void park();
void unpark();
int TryPark();
int park(jlong millis); // relative timed-wait only
void SetAssociation(Thread * a) { _Assoc = a; }
};

View File

@ -5440,20 +5440,11 @@ static timestruc_t* compute_abstime(timestruc_t* abstime, jlong millis) {
return abstime;
}
// Test-and-clear _Event, always leaves _Event set to 0, returns immediately.
// Conceptually TryPark() should be equivalent to park(0).
int os::PlatformEvent::TryPark() {
for (;;) {
const int v = _Event;
guarantee((v == 0) || (v == 1), "invariant");
if (Atomic::cmpxchg(0, &_Event, v) == v) return v;
}
}
void os::PlatformEvent::park() { // AKA: down()
// Invariant: Only the thread associated with the Event/PlatformEvent
// may call park().
assert(_nParked == 0, "invariant");
int v;
for (;;) {
v = _Event;
@ -5540,8 +5531,7 @@ void os::PlatformEvent::unpark() {
// 1 :=> 1
// -1 :=> either 0 or 1; must signal target thread
// That is, we can safely transition _Event from -1 to either
// 0 or 1. Forcing 1 is slightly more efficient for back-to-back
// unpark() calls.
// 0 or 1.
// See also: "Semaphores in Plan 9" by Mullender & Cox
//
// Note: Forcing a transition from "-1" to "1" on an unpark() means
@ -5745,10 +5735,9 @@ void Parker::park(bool isAbsolute, jlong time) {
}
void Parker::unpark() {
int s, status;
status = os::Solaris::mutex_lock(_mutex);
int status = os::Solaris::mutex_lock(_mutex);
assert(status == 0, "invariant");
s = _counter;
const int s = _counter;
_counter = 1;
status = os::Solaris::mutex_unlock(_mutex);
assert(status == 0, "invariant");

View File

@ -332,7 +332,6 @@ class PlatformEvent : public CHeapObj<mtInternal> {
int fired() { return _Event; }
void park();
int park(jlong millis);
int TryPark();
void unpark();
};

View File

@ -4876,8 +4876,7 @@ void os::PlatformEvent::unpark() {
// 1 :=> 1
// -1 :=> either 0 or 1; must signal target thread
// That is, we can safely transition _Event from -1 to either
// 0 or 1. Forcing 1 is slightly more efficient for back-to-back
// unpark() calls.
// 0 or 1.
// See also: "Semaphores in Plan 9" by Mullender & Cox
//
// Note: Forcing a transition from "-1" to "1" on an unpark() means

View File

@ -1130,29 +1130,30 @@ class CommandLineFlags {
"Use LWP-based instead of libthread-based synchronization " \
"(SPARC only)") \
\
product(ccstr, SyncKnobs, NULL, \
"(Unstable) Various monitor synchronization tunables") \
experimental(ccstr, SyncKnobs, NULL, \
"(Unstable) Various monitor synchronization tunables") \
\
product(intx, EmitSync, 0, \
"(Unsafe, Unstable) " \
"Control emission of inline sync fast-path code") \
experimental(intx, EmitSync, 0, \
"(Unsafe, Unstable) " \
"Control emission of inline sync fast-path code") \
\
product(intx, MonitorBound, 0, "Bound Monitor population") \
\
product(bool, MonitorInUseLists, false, "Track Monitors for Deflation") \
\
product(intx, SyncFlags, 0, "(Unsafe, Unstable) Experimental Sync flags") \
experimental(intx, SyncFlags, 0, "(Unsafe, Unstable) " \
"Experimental Sync flags") \
\
product(intx, SyncVerbose, 0, "(Unstable)") \
experimental(intx, SyncVerbose, 0, "(Unstable)") \
\
product(intx, ClearFPUAtPark, 0, "(Unsafe, Unstable)") \
experimental(intx, ClearFPUAtPark, 0, "(Unsafe, Unstable)") \
\
product(intx, hashCode, 5, \
"(Unstable) select hashCode generation algorithm") \
experimental(intx, hashCode, 5, \
"(Unstable) select hashCode generation algorithm") \
\
product(intx, WorkAroundNPTLTimedWaitHang, 1, \
"(Unstable, Linux-specific) " \
"avoid NPTL-FUTEX hang pthread_cond_timedwait") \
experimental(intx, WorkAroundNPTLTimedWaitHang, 1, \
"(Unstable, Linux-specific) " \
"avoid NPTL-FUTEX hang pthread_cond_timedwait") \
\
product(bool, FilterSpuriousWakeups, true, \
"When true prevents OS-level spurious, or premature, wakeups " \

View File

@ -1,4 +1,3 @@
/*
* Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@ -488,7 +487,6 @@ void Monitor::ILock (Thread * Self) {
for (;;) {
assert(_OnDeck == ESelf, "invariant");
if (TrySpin(Self)) break;
// CONSIDER: if ESelf->TryPark() && TryLock() break ...
// It's probably wise to spin only if we *actually* blocked
// CONSIDER: check the lockbyte, if it remains set then
// preemptively drain the cxq into the EntryList.

View File

@ -46,9 +46,9 @@
#if defined(__GNUC__) && !defined(IA64) && !defined(PPC64)
// Need to inhibit inlining for older versions of GCC to avoid build-time failures
#define ATTR __attribute__((noinline))
#define NOINLINE __attribute__((noinline))
#else
#define ATTR
#define NOINLINE
#endif
@ -103,38 +103,39 @@
// The knob* variables are effectively final. Once set they should
// never be modified hence. Consider using __read_mostly with GCC.
int ObjectMonitor::Knob_Verbose = 0;
int ObjectMonitor::Knob_SpinLimit = 5000; // derived by an external tool -
static int Knob_LogSpins = 0; // enable jvmstat tally for spins
static int Knob_HandOff = 0;
static int Knob_ReportSettings = 0;
int ObjectMonitor::Knob_Verbose = 0;
int ObjectMonitor::Knob_VerifyInUse = 0;
int ObjectMonitor::Knob_SpinLimit = 5000; // derived by an external tool -
static int Knob_LogSpins = 0; // enable jvmstat tally for spins
static int Knob_HandOff = 0;
static int Knob_ReportSettings = 0;
static int Knob_SpinBase = 0; // Floor AKA SpinMin
static int Knob_SpinBackOff = 0; // spin-loop backoff
static int Knob_CASPenalty = -1; // Penalty for failed CAS
static int Knob_OXPenalty = -1; // Penalty for observed _owner change
static int Knob_SpinSetSucc = 1; // spinners set the _succ field
static int Knob_SpinEarly = 1;
static int Knob_SuccEnabled = 1; // futile wake throttling
static int Knob_SuccRestrict = 0; // Limit successors + spinners to at-most-one
static int Knob_MaxSpinners = -1; // Should be a function of # CPUs
static int Knob_Bonus = 100; // spin success bonus
static int Knob_BonusB = 100; // spin success bonus
static int Knob_Penalty = 200; // spin failure penalty
static int Knob_Poverty = 1000;
static int Knob_SpinAfterFutile = 1; // Spin after returning from park()
static int Knob_FixedSpin = 0;
static int Knob_OState = 3; // Spinner checks thread state of _owner
static int Knob_UsePause = 1;
static int Knob_ExitPolicy = 0;
static int Knob_PreSpin = 10; // 20-100 likely better
static int Knob_ResetEvent = 0;
static int BackOffMask = 0;
static int Knob_SpinBase = 0; // Floor AKA SpinMin
static int Knob_SpinBackOff = 0; // spin-loop backoff
static int Knob_CASPenalty = -1; // Penalty for failed CAS
static int Knob_OXPenalty = -1; // Penalty for observed _owner change
static int Knob_SpinSetSucc = 1; // spinners set the _succ field
static int Knob_SpinEarly = 1;
static int Knob_SuccEnabled = 1; // futile wake throttling
static int Knob_SuccRestrict = 0; // Limit successors + spinners to at-most-one
static int Knob_MaxSpinners = -1; // Should be a function of # CPUs
static int Knob_Bonus = 100; // spin success bonus
static int Knob_BonusB = 100; // spin success bonus
static int Knob_Penalty = 200; // spin failure penalty
static int Knob_Poverty = 1000;
static int Knob_SpinAfterFutile = 1; // Spin after returning from park()
static int Knob_FixedSpin = 0;
static int Knob_OState = 3; // Spinner checks thread state of _owner
static int Knob_UsePause = 1;
static int Knob_ExitPolicy = 0;
static int Knob_PreSpin = 10; // 20-100 likely better
static int Knob_ResetEvent = 0;
static int BackOffMask = 0;
static int Knob_FastHSSEC = 0;
static int Knob_MoveNotifyee = 2; // notify() - disposition of notifyee
static int Knob_QMode = 0; // EntryList-cxq policy - queue discipline
static volatile int InitDone = 0;
static int Knob_FastHSSEC = 0;
static int Knob_MoveNotifyee = 2; // notify() - disposition of notifyee
static int Knob_QMode = 0; // EntryList-cxq policy - queue discipline
static volatile int InitDone = 0;
#define TrySpin TrySpin_VaryDuration
@ -199,7 +200,7 @@ static volatile int InitDone = 0;
// on EntryList|cxq. That is, spinning relieves contention on the "inner"
// locks and monitor metadata.
//
// Cxq points to the the set of Recently Arrived Threads attempting entry.
// Cxq points to the set of Recently Arrived Threads attempting entry.
// Because we push threads onto _cxq with CAS, the RATs must take the form of
// a singly-linked LIFO. We drain _cxq into EntryList at unlock-time when
// the unlocking thread notices that EntryList is null but _cxq is != null.
@ -269,13 +270,12 @@ bool ObjectMonitor::try_enter(Thread* THREAD) {
}
}
void ATTR ObjectMonitor::enter(TRAPS) {
void NOINLINE ObjectMonitor::enter(TRAPS) {
// The following code is ordered to check the most common cases first
// and to reduce RTS->RTO cache line upgrades on SPARC and IA32 processors.
Thread * const Self = THREAD;
void * cur;
cur = Atomic::cmpxchg_ptr(Self, &_owner, NULL);
void * cur = Atomic::cmpxchg_ptr (Self, &_owner, NULL);
if (cur == NULL) {
// Either ASSERT _recursions == 0 or explicitly set _recursions = 0.
assert(_recursions == 0 , "invariant");
@ -435,26 +435,24 @@ void ATTR ObjectMonitor::enter(TRAPS) {
// Callers must compensate as needed.
int ObjectMonitor::TryLock (Thread * Self) {
for (;;) {
void * own = _owner;
if (own != NULL) return 0;
if (Atomic::cmpxchg_ptr (Self, &_owner, NULL) == NULL) {
// Either guarantee _recursions == 0 or set _recursions = 0.
assert(_recursions == 0, "invariant");
assert(_owner == Self, "invariant");
// CONSIDER: set or assert that OwnerIsThread == 1
return 1;
}
// The lock had been free momentarily, but we lost the race to the lock.
// Interference -- the CAS failed.
// We can either return -1 or retry.
// Retry doesn't make as much sense because the lock was just acquired.
if (true) return -1;
}
void * own = _owner;
if (own != NULL) return 0;
if (Atomic::cmpxchg_ptr (Self, &_owner, NULL) == NULL) {
// Either guarantee _recursions == 0 or set _recursions = 0.
assert(_recursions == 0, "invariant");
assert(_owner == Self, "invariant");
// CONSIDER: set or assert that OwnerIsThread == 1
return 1;
}
// The lock had been free momentarily, but we lost the race to the lock.
// Interference -- the CAS failed.
// We can either return -1 or retry.
// Retry doesn't make as much sense because the lock was just acquired.
return -1;
}
void ATTR ObjectMonitor::EnterI (TRAPS) {
Thread * Self = THREAD;
void NOINLINE ObjectMonitor::EnterI (TRAPS) {
Thread * const Self = THREAD;
assert(Self->is_Java_thread(), "invariant");
assert(((JavaThread *) Self)->thread_state() == _thread_blocked , "invariant");
@ -550,7 +548,7 @@ void ATTR ObjectMonitor::EnterI (TRAPS) {
Atomic::cmpxchg_ptr(Self, &_Responsible, NULL);
}
// The lock have been released while this thread was occupied queueing
// The lock might have been released while this thread was occupied queueing
// itself onto _cxq. To close the race and avoid "stranding" and
// progress-liveness failure we must resample-retry _owner before parking.
// Note the Dekker/Lamport duality: ST cxq; MEMBAR; LD Owner.
@ -702,7 +700,7 @@ void ATTR ObjectMonitor::EnterI (TRAPS) {
// Knob_Reset and Knob_SpinAfterFutile support and restructuring the
// loop accordingly.
void ATTR ObjectMonitor::ReenterI (Thread * Self, ObjectWaiter * SelfNode) {
void NOINLINE ObjectMonitor::ReenterI (Thread * Self, ObjectWaiter * SelfNode) {
assert(Self != NULL , "invariant");
assert(SelfNode != NULL , "invariant");
assert(SelfNode->_thread == Self , "invariant");
@ -790,6 +788,7 @@ void ATTR ObjectMonitor::ReenterI (Thread * Self, ObjectWaiter * SelfNode) {
OrderAccess::fence(); // see comments at the end of EnterI()
}
// By convention we unlink a contending thread from EntryList|cxq immediately
// after the thread acquires the lock in ::enter(). Equally, we could defer
// unlinking the thread until ::exit()-time.
@ -810,7 +809,7 @@ void ObjectMonitor::UnlinkAfterAcquire (Thread * Self, ObjectWaiter * SelfNode)
assert(prv == NULL || prv->TState == ObjectWaiter::TS_ENTER, "invariant");
TEVENT(Unlink from EntryList);
} else {
guarantee(SelfNode->TState == ObjectWaiter::TS_CXQ, "invariant");
assert(SelfNode->TState == ObjectWaiter::TS_CXQ, "invariant");
// Inopportune interleaving -- Self is still on the cxq.
// This usually means the enqueue of self raced an exiting thread.
// Normally we'll find Self near the front of the cxq, so
@ -850,10 +849,12 @@ void ObjectMonitor::UnlinkAfterAcquire (Thread * Self, ObjectWaiter * SelfNode)
TEVENT(Unlink from cxq);
}
#ifdef ASSERT
// Diagnostic hygiene ...
SelfNode->_prev = (ObjectWaiter *) 0xBAD;
SelfNode->_next = (ObjectWaiter *) 0xBAD;
SelfNode->TState = ObjectWaiter::TS_RUN;
#endif
}
// -----------------------------------------------------------------------------
@ -906,9 +907,15 @@ void ObjectMonitor::UnlinkAfterAcquire (Thread * Self, ObjectWaiter * SelfNode)
// the integral of the # of active timers at any instant over time).
// Both impinge on OS scalability. Given that, at most one thread parked on
// a monitor will use a timer.
//
// There is also the risk of a futile wake-up. If we drop the lock
// another thread can reacquire the lock immediately, and we can
// then wake a thread unnecessarily. This is benign, and we've
// structured the code so the windows are short and the frequency
// of such futile wakups is low.
void ATTR ObjectMonitor::exit(bool not_suspended, TRAPS) {
Thread * Self = THREAD;
void NOINLINE ObjectMonitor::exit(bool not_suspended, TRAPS) {
Thread * const Self = THREAD;
if (THREAD != _owner) {
if (THREAD->is_lock_owned((address) _owner)) {
// Transmute _owner from a BasicLock pointer to a Thread address.
@ -920,14 +927,17 @@ void ATTR ObjectMonitor::exit(bool not_suspended, TRAPS) {
_recursions = 0;
OwnerIsThread = 1;
} else {
// NOTE: we need to handle unbalanced monitor enter/exit
// in native code by throwing an exception.
// TODO: Throw an IllegalMonitorStateException ?
// Apparent unbalanced locking ...
// Naively we'd like to throw IllegalMonitorStateException.
// As a practical matter we can neither allocate nor throw an
// exception as ::exit() can be called from leaf routines.
// see x86_32.ad Fast_Unlock() and the I1 and I2 properties.
// Upon deeper reflection, however, in a properly run JVM the only
// way we should encounter this situation is in the presence of
// unbalanced JNI locking. TODO: CheckJNICalls.
// See also: CR4414101
TEVENT(Exit - Throw IMSX);
assert(false, "Non-balanced monitor enter/exit!");
if (false) {
THROW(vmSymbols::java_lang_IllegalMonitorStateException());
}
assert(false, "Non-balanced monitor enter/exit! Likely JNI locking");
return;
}
}
@ -976,6 +986,7 @@ void ATTR ObjectMonitor::exit(bool not_suspended, TRAPS) {
return;
}
TEVENT(Inflated exit - complex egress);
// Other threads are blocked trying to acquire the lock.
// Normally the exiting thread is responsible for ensuring succession,
// but if other successors are ready or other entering threads are spinning
@ -1142,9 +1153,9 @@ void ATTR ObjectMonitor::exit(bool not_suspended, TRAPS) {
if (w != NULL) {
// I'd like to write: guarantee (w->_thread != Self).
// But in practice an exiting thread may find itself on the EntryList.
// Lets say thread T1 calls O.wait(). Wait() enqueues T1 on O's waitset and
// Let's say thread T1 calls O.wait(). Wait() enqueues T1 on O's waitset and
// then calls exit(). Exit release the lock by setting O._owner to NULL.
// Lets say T1 then stalls. T2 acquires O and calls O.notify(). The
// Let's say T1 then stalls. T2 acquires O and calls O.notify(). The
// notify() operation moves T1 from O's waitset to O's EntryList. T2 then
// release the lock "O". T2 resumes immediately after the ST of null into
// _owner, above. T2 notices that the EntryList is populated, so it
@ -1261,10 +1272,13 @@ void ATTR ObjectMonitor::exit(bool not_suspended, TRAPS) {
// MEMBAR
// LD Self_>_suspend_flags
//
// UPDATE 2007-10-6: since I've replaced the native Mutex/Monitor subsystem
// with a more efficient implementation, the need to use "FastHSSEC" has
// decreased. - Dave
bool ObjectMonitor::ExitSuspendEquivalent (JavaThread * jSelf) {
int Mode = Knob_FastHSSEC;
const int Mode = Knob_FastHSSEC;
if (Mode && !jSelf->is_external_suspend()) {
assert(jSelf->is_suspend_equivalent(), "invariant");
jSelf->clear_suspend_equivalent();
@ -1413,7 +1427,7 @@ void ObjectMonitor::post_monitor_wait_event(EventJavaMonitorWait* event,
// Wait/Notify/NotifyAll
//
// Note: a subset of changes to ObjectMonitor::wait()
// will need to be replicated in complete_exit above
// will need to be replicated in complete_exit
void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) {
Thread * const Self = THREAD;
assert(Self->is_Java_thread(), "Must be Java thread!");
@ -2268,12 +2282,12 @@ ObjectWaiter::ObjectWaiter(Thread* thread) {
assert(_event != NULL, "invariant");
}
void ObjectWaiter::wait_reenter_begin(ObjectMonitor *mon) {
void ObjectWaiter::wait_reenter_begin(ObjectMonitor * const mon) {
JavaThread *jt = (JavaThread *)this->_thread;
_active = JavaThreadBlockedOnMonitorEnterState::wait_reenter_begin(jt, mon);
}
void ObjectWaiter::wait_reenter_end(ObjectMonitor *mon) {
void ObjectWaiter::wait_reenter_end(ObjectMonitor * const mon) {
JavaThread *jt = (JavaThread *)this->_thread;
JavaThreadBlockedOnMonitorEnterState::wait_reenter_end(jt, _active);
}
@ -2455,6 +2469,7 @@ void ObjectMonitor::DeferredInitialize() {
#define SETKNOB(x) { Knob_##x = kvGetInt (knobs, #x, Knob_##x); }
SETKNOB(ReportSettings);
SETKNOB(Verbose);
SETKNOB(VerifyInUse);
SETKNOB(FixedSpin);
SETKNOB(SpinLimit);
SETKNOB(SpinBase);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -311,6 +311,7 @@ public:
public:
static int Knob_Verbose;
static int Knob_VerifyInUse;
static int Knob_SpinLimit;
void* operator new (size_t size) throw() {
return AllocateHeap(size, mtInternal);

View File

@ -1810,14 +1810,8 @@ JRT_END
// Handles the uncommon case in locking, i.e., contention or an inflated lock.
#ifndef PRODUCT
int SharedRuntime::_monitor_enter_ctr=0;
#endif
JRT_ENTRY_NO_ASYNC(void, SharedRuntime::complete_monitor_locking_C(oopDesc* _obj, BasicLock* lock, JavaThread* thread))
oop obj(_obj);
#ifndef PRODUCT
_monitor_enter_ctr++; // monitor enter slow
#endif
if (PrintBiasedLockingStatistics) {
Atomic::inc(BiasedLocking::slow_path_entry_count_addr());
}
@ -1831,15 +1825,9 @@ JRT_ENTRY_NO_ASYNC(void, SharedRuntime::complete_monitor_locking_C(oopDesc* _obj
assert(!HAS_PENDING_EXCEPTION, "Should have no exception here");
JRT_END
#ifndef PRODUCT
int SharedRuntime::_monitor_exit_ctr=0;
#endif
// Handles the uncommon cases of monitor unlocking in compiled code
JRT_LEAF(void, SharedRuntime::complete_monitor_unlocking_C(oopDesc* _obj, BasicLock* lock))
oop obj(_obj);
#ifndef PRODUCT
_monitor_exit_ctr++; // monitor exit slow
#endif
Thread* THREAD = JavaThread::current();
// I'm not convinced we need the code contained by MIGHT_HAVE_PENDING anymore
// testing was unable to ever fire the assert that guarded it so I have removed it.
@ -1879,8 +1867,6 @@ void SharedRuntime::print_statistics() {
ttyLocker ttyl;
if (xtty != NULL) xtty->head("statistics type='SharedRuntime'");
if (_monitor_enter_ctr) tty->print_cr("%5d monitor enter slow", _monitor_enter_ctr);
if (_monitor_exit_ctr) tty->print_cr("%5d monitor exit slow", _monitor_exit_ctr);
if (_throw_null_ctr) tty->print_cr("%5d implicit null throw", _throw_null_ctr);
SharedRuntime::print_ic_miss_histogram();
@ -2464,9 +2450,9 @@ AdapterHandlerEntry* AdapterHandlerLibrary::get_adapter(methodHandle method) {
if (PrintAdapterHandlers || PrintStubCode) {
ttyLocker ttyl;
entry->print_adapter_on(tty);
tty->print_cr("i2c argument handler #%d for: %s %s (%d bytes generated)",
tty->print_cr("i2c argument handler #%d for: %s %s %s (%d bytes generated)",
_adapters->number_of_entries(), (method->is_static() ? "static" : "receiver"),
method->signature()->as_C_string(), insts_size);
method->signature()->as_C_string(), fingerprint->as_string(), insts_size);
tty->print_cr("c2i argument handler starts at %p", entry->get_c2i_entry());
if (Verbose || PrintStubCode) {
address first_pc = entry->base_address();

View File

@ -516,8 +516,6 @@ class SharedRuntime: AllStatic {
static void trace_ic_miss(address at);
public:
static int _monitor_enter_ctr; // monitor enter slow
static int _monitor_exit_ctr; // monitor exit slow
static int _throw_null_ctr; // throwing a null-pointer exception
static int _ic_miss_ctr; // total # of IC misses
static int _wrong_method_ctr;

View File

@ -44,9 +44,9 @@
#if defined(__GNUC__) && !defined(PPC64)
// Need to inhibit inlining for older versions of GCC to avoid build-time failures
#define ATTR __attribute__((noinline))
#define NOINLINE __attribute__((noinline))
#else
#define ATTR
#define NOINLINE
#endif
PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
@ -206,14 +206,6 @@ void ObjectSynchronizer::slow_enter(Handle obj, BasicLock* lock, TRAPS) {
return;
}
#if 0
// The following optimization isn't particularly useful.
if (mark->has_monitor() && mark->monitor()->is_entered(THREAD)) {
lock->set_displaced_header(NULL);
return;
}
#endif
// The object header will never be displaced to this lock,
// so it does not matter what the value is, except that it
// must be non-zero to avoid looking like a re-entrant lock,
@ -573,7 +565,7 @@ intptr_t ObjectSynchronizer::FastHashCode (Thread * Self, oop obj) {
// added check of the bias pattern is to avoid useless calls to
// thread-local storage.
if (obj->mark()->has_bias_pattern()) {
// Box and unbox the raw reference just in case we cause a STW safepoint.
// Handle for oop obj in case of STW safepoint
Handle hobj(Self, obj);
// Relaxing assertion for bug 6320749.
assert(Universe::verify_in_progress() ||
@ -890,23 +882,23 @@ static void InduceScavenge (Thread * Self, const char * Whence) {
}
}
}
/* Too slow for general assert or debug
void ObjectSynchronizer::verifyInUse (Thread *Self) {
ObjectMonitor* mid;
int inusetally = 0;
for (mid = Self->omInUseList; mid != NULL; mid = mid->FreeNext) {
inusetally ++;
inusetally++;
}
assert(inusetally == Self->omInUseCount, "inuse count off");
int freetally = 0;
for (mid = Self->omFreeList; mid != NULL; mid = mid->FreeNext) {
freetally ++;
freetally++;
}
assert(freetally == Self->omFreeCount, "free count off");
}
*/
ObjectMonitor * ATTR ObjectSynchronizer::omAlloc (Thread * Self) {
ObjectMonitor * NOINLINE ObjectSynchronizer::omAlloc (Thread * Self) {
// A large MAXPRIVATE value reduces both list lock contention
// and list coherency traffic, but also tends to increase the
// number of objectMonitors in circulation as well as the STW
@ -932,7 +924,9 @@ ObjectMonitor * ATTR ObjectSynchronizer::omAlloc (Thread * Self) {
m->FreeNext = Self->omInUseList;
Self->omInUseList = m;
Self->omInUseCount++;
// verifyInUse(Self);
if (ObjectMonitor::Knob_VerifyInUse) {
verifyInUse(Self);
}
} else {
m->FreeNext = NULL;
}
@ -1052,7 +1046,9 @@ void ObjectSynchronizer::omRelease (Thread * Self, ObjectMonitor * m, bool fromP
curmidinuse->FreeNext = mid->FreeNext; // maintain the current thread inuselist
}
Self->omInUseCount--;
// verifyInUse(Self);
if (ObjectMonitor::Knob_VerifyInUse) {
verifyInUse(Self);
}
break;
} else {
curmidinuse = mid;
@ -1061,7 +1057,7 @@ void ObjectSynchronizer::omRelease (Thread * Self, ObjectMonitor * m, bool fromP
}
}
// FreeNext is used for both onInUseList and omFreeList, so clear old before setting new
// FreeNext is used for both omInUseList and omFreeList, so clear old before setting new
m->FreeNext = Self->omFreeList;
Self->omFreeList = m;
Self->omFreeCount++;
@ -1074,7 +1070,7 @@ void ObjectSynchronizer::omRelease (Thread * Self, ObjectMonitor * m, bool fromP
// consecutive STW safepoints. Relatedly, we might decay
// omFreeProvision at STW safepoints.
//
// Also return the monitors of a moribund thread"s omInUseList to
// Also return the monitors of a moribund thread's omInUseList to
// a global gOmInUseList under the global list lock so these
// will continue to be scanned.
//
@ -1115,7 +1111,6 @@ void ObjectSynchronizer::omFlush (Thread * Self) {
InUseTail = curom;
InUseTally++;
}
// TODO debug
assert(Self->omInUseCount == InUseTally, "inuse count off");
Self->omInUseCount = 0;
guarantee(InUseTail != NULL && InUseList != NULL, "invariant");
@ -1154,7 +1149,7 @@ ObjectMonitor* ObjectSynchronizer::inflate_helper(oop obj) {
// multiple locks occupy the same $ line. Padding might be appropriate.
ObjectMonitor * ATTR ObjectSynchronizer::inflate (Thread * Self, oop object) {
ObjectMonitor * NOINLINE ObjectSynchronizer::inflate (Thread * Self, oop object) {
// Inflate mutates the heap ...
// Relaxing assertion for bug 6320749.
assert(Universe::verify_in_progress() ||
@ -1385,7 +1380,7 @@ enum ManifestConstants {
// Deflate a single monitor if not in use
// Return true if deflated, false if in use
bool ObjectSynchronizer::deflate_monitor(ObjectMonitor* mid, oop obj,
ObjectMonitor** FreeHeadp, ObjectMonitor** FreeTailp) {
ObjectMonitor** freeHeadp, ObjectMonitor** freeTailp) {
bool deflated;
// Normal case ... The monitor is associated with obj.
guarantee(obj->mark() == markOopDesc::encode(mid), "invariant");
@ -1415,13 +1410,13 @@ bool ObjectSynchronizer::deflate_monitor(ObjectMonitor* mid, oop obj,
assert(mid->object() == NULL, "invariant");
// Move the object to the working free list defined by FreeHead,FreeTail.
if (*FreeHeadp == NULL) *FreeHeadp = mid;
if (*FreeTailp != NULL) {
ObjectMonitor * prevtail = *FreeTailp;
if (*freeHeadp == NULL) *freeHeadp = mid;
if (*freeTailp != NULL) {
ObjectMonitor * prevtail = *freeTailp;
assert(prevtail->FreeNext == NULL, "cleaned up deflated?"); // TODO KK
prevtail->FreeNext = mid;
}
*FreeTailp = mid;
*freeTailp = mid;
deflated = true;
}
return deflated;
@ -1429,7 +1424,7 @@ bool ObjectSynchronizer::deflate_monitor(ObjectMonitor* mid, oop obj,
// Caller acquires ListLock
int ObjectSynchronizer::walk_monitor_list(ObjectMonitor** listheadp,
ObjectMonitor** FreeHeadp, ObjectMonitor** FreeTailp) {
ObjectMonitor** freeHeadp, ObjectMonitor** freeTailp) {
ObjectMonitor* mid;
ObjectMonitor* next;
ObjectMonitor* curmidinuse = NULL;
@ -1439,7 +1434,7 @@ int ObjectSynchronizer::walk_monitor_list(ObjectMonitor** listheadp,
oop obj = (oop) mid->object();
bool deflated = false;
if (obj != NULL) {
deflated = deflate_monitor(mid, obj, FreeHeadp, FreeTailp);
deflated = deflate_monitor(mid, obj, freeHeadp, freeTailp);
}
if (deflated) {
// extract from per-thread in-use-list
@ -1482,7 +1477,9 @@ void ObjectSynchronizer::deflate_idle_monitors() {
nInCirculation+= cur->omInUseCount;
int deflatedcount = walk_monitor_list(cur->omInUseList_addr(), &FreeHead, &FreeTail);
cur->omInUseCount-= deflatedcount;
// verifyInUse(cur);
if (ObjectMonitor::Knob_VerifyInUse) {
verifyInUse(cur);
}
nScavenged += deflatedcount;
nInuse += cur->omInUseCount;
}

View File

@ -84,7 +84,7 @@ class ObjectSynchronizer : AllStatic {
static void reenter (Handle obj, intptr_t recursion, TRAPS);
// thread-specific and global objectMonitor free list accessors
// static void verifyInUse (Thread * Self) ; too slow for general assert/debug
static void verifyInUse(Thread * Self);
static ObjectMonitor * omAlloc(Thread * Self);
static void omRelease(Thread * Self, ObjectMonitor * m, bool FromPerThreadAlloc);
static void omFlush(Thread * Self);
@ -114,10 +114,10 @@ class ObjectSynchronizer : AllStatic {
// An adaptive profile-based deflation policy could be used if needed
static void deflate_idle_monitors();
static int walk_monitor_list(ObjectMonitor** listheadp,
ObjectMonitor** FreeHeadp,
ObjectMonitor** FreeTailp);
static bool deflate_monitor(ObjectMonitor* mid, oop obj, ObjectMonitor** FreeHeadp,
ObjectMonitor** FreeTailp);
ObjectMonitor** freeHeadp,
ObjectMonitor** freeTailp);
static bool deflate_monitor(ObjectMonitor* mid, oop obj, ObjectMonitor** freeHeadp,
ObjectMonitor** freeTailp);
static void oops_do(OopClosure* f);
// debugging
@ -130,7 +130,10 @@ class ObjectSynchronizer : AllStatic {
enum { _BLOCKSIZE = 128 };
static ObjectMonitor* gBlockList;
static ObjectMonitor * volatile gFreeList;
static ObjectMonitor * volatile gOmInUseList; // for moribund thread, so monitors they inflated still get scanned
// global monitor in use list, for moribund threads,
// monitors they inflated need to be scanned for deflation
static ObjectMonitor * volatile gOmInUseList;
// count of entries in gOmInUseList
static int gOmInUseCount;
};

View File

@ -4392,6 +4392,12 @@ void Thread::SpinRelease (volatile int * adr) {
// It's safe if subsequent LDs and STs float "up" into the critical section,
// but prior LDs and STs within the critical section can't be allowed
// to reorder or float past the ST that releases the lock.
// Loads and stores in the critical section - which appear in program
// order before the store that releases the lock - must also appear
// before the store that releases the lock in memory visibility order.
// Conceptually we need a #loadstore|#storestore "release" MEMBAR before
// the ST of 0 into the lock-word which releases the lock, so fence
// more than covers this on all platforms.
*adr = 0;
}
@ -4573,18 +4579,25 @@ void Thread::muxAcquireW (volatile intptr_t * Lock, ParkEvent * ev) {
// This implementation pops from the head of the list. This is unfair,
// but tends to provide excellent throughput as hot threads remain hot.
// (We wake recently run threads first).
//
// All paths through muxRelease() will execute a CAS.
// Release consistency -- We depend on the CAS in muxRelease() to provide full
// bidirectional fence/MEMBAR semantics, ensuring that all prior memory operations
// executed within the critical section are complete and globally visible before the
// store (CAS) to the lock-word that releases the lock becomes globally visible.
void Thread::muxRelease (volatile intptr_t * Lock) {
for (;;) {
const intptr_t w = Atomic::cmpxchg_ptr(0, Lock, LOCKBIT);
assert(w & LOCKBIT, "invariant");
if (w == LOCKBIT) return;
ParkEvent * List = (ParkEvent *)(w & ~LOCKBIT);
ParkEvent * const List = (ParkEvent *) (w & ~LOCKBIT);
assert(List != NULL, "invariant");
assert(List->OnList == intptr_t(Lock), "invariant");
ParkEvent * nxt = List->ListNext;
ParkEvent * const nxt = List->ListNext;
guarantee((intptr_t(nxt) & LOCKBIT) == 0, "invariant");
// The following CAS() releases the lock and pops the head element.
// The CAS() also ratifies the previously fetched lock-word value.
if (Atomic::cmpxchg_ptr (intptr_t(nxt), Lock, w) != w) {
continue;
}