8049737: Contended Locking reorder and cache line bucket

JEP-143/JDK-8046133 - optimization #1 - reorder and cache line bucket.

Co-authored-by: Dave Dice <dave.dice@oracle.com>
Co-authored-by: Karen Kinnear <karen.kinnear@oracle.com>
Reviewed-by: shade, dice, dholmes, dsimms
This commit is contained in:
Daniel D. Daugherty 2014-10-14 10:32:12 -07:00
parent 51866388d1
commit f1ab0fae73
13 changed files with 302 additions and 247 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -50,8 +50,8 @@ public class ObjectMonitor extends VMObject {
ownerFieldOffset = f.getOffset();
f = type.getField("FreeNext");
FreeNextFieldOffset = f.getOffset();
countField = type.getCIntegerField("_count");
waitersField = type.getCIntegerField("_waiters");
countField = type.getJIntField("_count");
waitersField = type.getJIntField("_waiters");
recursionsField = type.getCIntegerField("_recursions");
}
@ -81,15 +81,15 @@ public class ObjectMonitor extends VMObject {
// FIXME
// void set_owner(void* owner);
public long waiters() { return waitersField.getValue(addr); }
public int waiters() { return waitersField.getValue(addr); }
public Address freeNext() { return addr.getAddressAt(FreeNextFieldOffset); }
// FIXME
// void set_queue(void* owner);
public long count() { return countField.getValue(addr); }
public int count() { return countField.getValue(addr); }
// FIXME
// void set_count(intptr_t count);
// void set_count(int count);
public long recursions() { return recursionsField.getValue(addr); }
@ -97,18 +97,9 @@ public class ObjectMonitor extends VMObject {
return addr.getOopHandleAt(objectFieldOffset);
}
public long contentions() {
// refer to objectMonitor_xxx.inline.hpp - contentions definition.
// for Solaris and Linux, contentions is same as count. for Windows
// it is different (objectMonitor_win32.inline.hpp)
long count = count();
if (VM.getVM().getOS().equals("win32")) {
// don't count the owner of the monitor
return count > 0? count - 1 : 0;
} else {
// Solaris and Linux
return count;
}
// contentions is always equal to count
public int contentions() {
return count();
}
// FIXME
@ -123,8 +114,8 @@ public class ObjectMonitor extends VMObject {
private static long objectFieldOffset;
private static long ownerFieldOffset;
private static long FreeNextFieldOffset;
private static CIntegerField countField;
private static CIntegerField waitersField;
private static JIntField countField;
private static JIntField waitersField;
private static CIntegerField recursionsField;
// FIXME: expose platform-dependent stuff
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2007, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -48,9 +48,17 @@ public class ObjectSynchronizer {
blockListField = type.getAddressField("gBlockList");
gBlockListAddr = blockListField.getValue();
blockSize = db.lookupIntConstant("ObjectSynchronizer::_BLOCKSIZE").intValue();
defaultCacheLineSize = db.lookupIntConstant("DEFAULT_CACHE_LINE_SIZE").intValue();
} catch (RuntimeException e) { }
type = db.lookupType("ObjectMonitor");
objectMonitorTypeSize = type.getSize();
if ((objectMonitorTypeSize % defaultCacheLineSize) != 0) {
// sizeof(ObjectMonitor) is not already a multiple of a cache line.
// The ObjectMonitor allocation code in ObjectSynchronizer pads each
// ObjectMonitor in a block to the next cache line boundary.
int needLines = ((int)objectMonitorTypeSize / defaultCacheLineSize) + 1;
objectMonitorTypeSize = needLines * defaultCacheLineSize;
}
}
public long identityHashValueFor(Oop obj) {
@ -122,6 +130,7 @@ public class ObjectSynchronizer {
private static Address gBlockListAddr;
private static int blockSize;
private static int defaultCacheLineSize;
private static long objectMonitorTypeSize;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -38,4 +38,26 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
#define SUPPORTS_NATIVE_CX8
// The expected size in bytes of a cache line, used to pad data structures.
#if defined(TIERED)
#ifdef _LP64
// tiered, 64-bit, large machine
#define DEFAULT_CACHE_LINE_SIZE 128
#else
// tiered, 32-bit, medium machine
#define DEFAULT_CACHE_LINE_SIZE 64
#endif
#elif defined(COMPILER1)
// pure C1, 32-bit, small machine
#define DEFAULT_CACHE_LINE_SIZE 16
#elif defined(COMPILER2) || defined(SHARK)
#ifdef _LP64
// pure C2, 64-bit, large machine
#define DEFAULT_CACHE_LINE_SIZE 128
#else
// pure C2, 32-bit, medium machine
#define DEFAULT_CACHE_LINE_SIZE 64
#endif
#endif
#endif // CPU_SPARC_VM_GLOBALDEFINITIONS_SPARC_HPP

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -35,4 +35,27 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
#define SUPPORTS_NATIVE_CX8
// The expected size in bytes of a cache line, used to pad data structures.
#if defined(TIERED)
#ifdef _LP64
// tiered, 64-bit, large machine
#define DEFAULT_CACHE_LINE_SIZE 128
#else
// tiered, 32-bit, medium machine
#define DEFAULT_CACHE_LINE_SIZE 64
#endif
#elif defined(COMPILER1)
// pure C1, 32-bit, small machine
// i486 was the last Intel chip with 16-byte cache line size
#define DEFAULT_CACHE_LINE_SIZE 32
#elif defined(COMPILER2) || defined(SHARK)
#ifdef _LP64
// pure C2, 64-bit, large machine
#define DEFAULT_CACHE_LINE_SIZE 128
#else
// pure C2, 32-bit, medium machine
#define DEFAULT_CACHE_LINE_SIZE 64
#endif
#endif
#endif // CPU_X86_VM_GLOBALDEFINITIONS_X86_HPP

View File

@ -76,10 +76,16 @@ class PaddedEndImpl<T, /*pad_size*/ 0> : public T {
// if the start address is a multiple of alignment.
template <class T, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
class PaddedEnd : public PaddedEndImpl<T, PADDED_END_SIZE(T, alignment)> {
// C++ don't allow zero-length arrays. The padding is put in a
// C++ doesn't allow zero-length arrays. The padding is put in a
// super class that is specialized for the pad_size == 0 case.
};
// Similar to PaddedEnd, this macro defines a _pad_buf#id field
// that is (alignment - size) bytes in size. This macro is used
// to add padding in between non-class fields in a class or struct.
#define DEFINE_PAD_MINUS_SIZE(id, alignment, size) \
char _pad_buf##id[(alignment) - (size)]
// Helper class to create an array of PaddedEnd<T> objects. All elements will
// start at a multiple of alignment and the size will be aligned to alignment.
template <class T, MEMFLAGS flags, size_t alignment = DEFAULT_CACHE_LINE_SIZE>

View File

@ -1031,7 +1031,7 @@ JvmtiEnvBase::get_object_monitor_usage(JavaThread* calling_thread, jobject objec
// implied else: entry_count == 0
}
int nWant,nWait;
jint nWant, nWait;
if (mon != NULL) {
// this object has a heavyweight monitor
nWant = mon->contentions(); // # of threads contending for monitor

View File

@ -257,7 +257,6 @@ bool ObjectMonitor::try_enter(Thread* THREAD) {
assert(_recursions == 0, "internal state error");
_owner = THREAD;
_recursions = 1;
OwnerIsThread = 1;
return true;
}
if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) {
@ -280,7 +279,6 @@ void NOINLINE ObjectMonitor::enter(TRAPS) {
// Either ASSERT _recursions == 0 or explicitly set _recursions = 0.
assert(_recursions == 0, "invariant");
assert(_owner == Self, "invariant");
// CONSIDER: set or assert OwnerIsThread == 1
return;
}
@ -296,7 +294,6 @@ void NOINLINE ObjectMonitor::enter(TRAPS) {
// Commute owner from a thread-specific on-stack BasicLockObject address to
// a full-fledged "Thread *".
_owner = Self;
OwnerIsThread = 1;
return;
}
@ -328,7 +325,7 @@ void NOINLINE ObjectMonitor::enter(TRAPS) {
// Prevent deflation at STW-time. See deflate_idle_monitors() and is_busy().
// Ensure the object-monitor relationship remains stable while there's contention.
Atomic::inc_ptr(&_count);
Atomic::inc(&_count);
EventJavaMonitorEnter event;
@ -384,7 +381,7 @@ void NOINLINE ObjectMonitor::enter(TRAPS) {
// acquire it.
}
Atomic::dec_ptr(&_count);
Atomic::dec(&_count);
assert(_count >= 0, "invariant");
Self->_Stalled = 0;
@ -440,7 +437,6 @@ int ObjectMonitor::TryLock(Thread * Self) {
// Either guarantee _recursions == 0 or set _recursions = 0.
assert(_recursions == 0, "invariant");
assert(_owner == Self, "invariant");
// CONSIDER: set or assert that OwnerIsThread == 1
return 1;
}
// The lock had been free momentarily, but we lost the race to the lock.
@ -922,7 +918,6 @@ void NOINLINE ObjectMonitor::exit(bool not_suspended, TRAPS) {
assert(_recursions == 0, "invariant");
_owner = THREAD;
_recursions = 0;
OwnerIsThread = 1;
} else {
// Apparent unbalanced locking ...
// Naively we'd like to throw IllegalMonitorStateException.
@ -1346,7 +1341,6 @@ intptr_t ObjectMonitor::complete_exit(TRAPS) {
assert(_recursions == 0, "internal state error");
_owner = THREAD; // Convert from basiclock addr to Thread addr
_recursions = 0;
OwnerIsThread = 1;
}
}
@ -1385,7 +1379,6 @@ void ObjectMonitor::reenter(intptr_t recursions, TRAPS) {
if (THREAD->is_lock_owned((address) _owner)) { \
_owner = THREAD; /* Convert from basiclock addr to Thread addr */ \
_recursions = 0; \
OwnerIsThread = 1; \
} else { \
TEVENT(Throw IMSX); \
THROW(vmSymbols::java_lang_IllegalMonitorStateException()); \
@ -1906,8 +1899,8 @@ void ObjectMonitor::notifyAll(TRAPS) {
// a contending thread could enqueue itself on the cxq and then spin locally
// on a thread-specific variable such as its ParkEvent._Event flag.
// That's left as an exercise for the reader. Note that global spinning is
// not problematic on Niagara, as the L2$ serves the interconnect and has both
// low latency and massive bandwidth.
// not problematic on Niagara, as the L2 cache serves the interconnect and
// has both low latency and massive bandwidth.
//
// Broadly, we can fix the spin frequency -- that is, the % of contended lock
// acquisition attempts where we opt to spin -- at 100% and vary the spin count
@ -2208,7 +2201,7 @@ int ObjectMonitor::TrySpin_VaryDuration(Thread * Self) {
// as advisory.
//
// Beware too, that _owner is sometimes a BasicLock address and sometimes
// a thread pointer. We differentiate the two cases with OwnerIsThread.
// a thread pointer.
// Alternately, we might tag the type (thread pointer vs basiclock pointer)
// with the LSB of _owner. Another option would be to probablistically probe
// the putative _owner->TypeTag value.
@ -2230,9 +2223,7 @@ int ObjectMonitor::TrySpin_VaryDuration(Thread * Self) {
int ObjectMonitor::NotRunnable(Thread * Self, Thread * ox) {
// Check either OwnerIsThread or ox->TypeTag == 2BAD.
if (!OwnerIsThread) return 0;
// Check ox->TypeTag == 2BAD.
if (ox == NULL) return 0;
// Avoid transitive spinning ...
@ -2399,20 +2390,6 @@ void ObjectMonitor::Initialize() {
}
}
// Compile-time asserts
// When possible, it's better to catch errors deterministically at
// compile-time than at runtime. The down-side to using compile-time
// asserts is that error message -- often something about negative array
// indices -- is opaque.
#define CTASSERT(x) { int tag[1-(2*!(x))]; printf ("Tag @" INTPTR_FORMAT "\n", (intptr_t)tag); }
void ObjectMonitor::ctAsserts() {
CTASSERT(offset_of (ObjectMonitor, _header) == 0);
}
static char * kvGet(char * kvList, const char * Key) {
if (kvList == NULL) return NULL;
size_t n = strlen(Key);
@ -2526,6 +2503,8 @@ void ObjectMonitor::sanity_checks() {
if (verbose) {
tty->print_cr("INFO: sizeof(ObjectMonitor)=" SIZE_FORMAT,
sizeof(ObjectMonitor));
tty->print_cr("INFO: sizeof(PaddedEnd<ObjectMonitor>)=" SIZE_FORMAT,
sizeof(PaddedEnd<ObjectMonitor>));
}
uint cache_line_size = VM_Version::L1_data_cache_line_size();
@ -2559,9 +2538,9 @@ void ObjectMonitor::sanity_checks() {
warning_cnt++;
}
if ((sizeof(ObjectMonitor) % cache_line_size) != 0) {
tty->print_cr("WARNING: ObjectMonitor size is not a multiple of "
"a cache line which permits false sharing.");
if ((sizeof(PaddedEnd<ObjectMonitor>) % cache_line_size) != 0) {
tty->print_cr("WARNING: PaddedEnd<ObjectMonitor> size is not a "
"multiple of a cache line which permits false sharing.");
warning_cnt++;
}
}

View File

@ -25,6 +25,7 @@
#ifndef SHARE_VM_RUNTIME_OBJECTMONITOR_HPP
#define SHARE_VM_RUNTIME_OBJECTMONITOR_HPP
#include "memory/padded.hpp"
#include "runtime/os.hpp"
#include "runtime/park.hpp"
#include "runtime/perfData.hpp"
@ -58,21 +59,71 @@ class ObjectWaiter : public StackObj {
// forward declaration to avoid include tracing.hpp
class EventJavaMonitorWait;
// WARNING:
// This is a very sensitive and fragile class. DO NOT make any
// change unless you are fully aware of the underlying semantics.
// This class can not inherit from any other class, because I have
// to let the displaced header be the very first word. Otherwise I
// have to let markOop include this file, which would export the
// monitor data structure to everywhere.
// The ObjectMonitor class implements the heavyweight version of a
// JavaMonitor. The lightweight BasicLock/stack lock version has been
// inflated into an ObjectMonitor. This inflation is typically due to
// contention or use of Object.wait().
//
// The ObjectMonitor class is used to implement JavaMonitors which have
// transformed from the lightweight structure of the thread stack to a
// heavy weight lock due to contention
// It is also used as RawMonitor by the JVMTI
// WARNING: This is a very sensitive and fragile class. DO NOT make any
// changes unless you are fully aware of the underlying semantics.
//
// Class JvmtiRawMonitor currently inherits from ObjectMonitor so
// changes in this class must be careful to not break JvmtiRawMonitor.
// These two subsystems should be separated.
//
// ObjectMonitor Layout Overview/Highlights/Restrictions:
//
// - The _header field must be at offset 0 because the displaced header
// from markOop is stored there. We do not want markOop.hpp to include
// ObjectMonitor.hpp to avoid exposing ObjectMonitor everywhere. This
// means that ObjectMonitor cannot inherit from any other class nor can
// it use any virtual member functions. This restriction is critical to
// the proper functioning of the VM.
// - The _header and _owner fields should be separated by enough space
// to avoid false sharing due to parallel access by different threads.
// This is an advisory recommendation.
// - The general layout of the fields in ObjectMonitor is:
// _header
// <lightly_used_fields>
// <optional padding>
// _owner
// <remaining_fields>
// - The VM assumes write ordering and machine word alignment with
// respect to the _owner field and the <remaining_fields> that can
// be read in parallel by other threads.
// - Generally fields that are accessed closely together in time should
// be placed proximally in space to promote data cache locality. That
// is, temporal locality should condition spatial locality.
// - We have to balance avoiding false sharing with excessive invalidation
// from coherence traffic. As such, we try to cluster fields that tend
// to be _written_ at approximately the same time onto the same data
// cache line.
// - We also have to balance the natural tension between minimizing
// single threaded capacity misses with excessive multi-threaded
// coherency misses. There is no single optimal layout for both
// single-threaded and multi-threaded environments.
//
// - See ObjectMonitor::sanity_checks() for how critical restrictions are
// enforced and advisory recommendations are reported.
// - Adjacent ObjectMonitors should be separated by enough space to avoid
// false sharing. This is handled by the ObjectMonitor allocation code
// in synchronizer.cpp. Also see ObjectSynchronizer::sanity_checks().
//
// Futures notes:
// - Separating _owner from the <remaining_fields> by enough space to
// avoid false sharing might be profitable. Given
// http://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
// we know that the CAS in monitorenter will invalidate the line
// underlying _owner. We want to avoid an L1 data cache miss on that
// same line for monitorexit. Putting these <remaining_fields>:
// _recursions, _EntryList, _cxq, and _succ, all of which may be
// fetched in the inflated unlock path, on a different cache line
// would make them immune to CAS-based invalidation from the _owner
// field.
//
// - The _recursions field should be of type int, or int32_t but not
// intptr_t. There's no reason to use a 64-bit type for this field
// in a 64-bit JVM.
class ObjectMonitor {
public:
@ -84,7 +135,84 @@ class ObjectMonitor {
OM_TIMED_OUT // Object.wait() timed out
};
private:
friend class ObjectSynchronizer;
friend class ObjectWaiter;
friend class VMStructs;
volatile markOop _header; // displaced object header word - mark
void* volatile _object; // backward object pointer - strong root
public:
ObjectMonitor * FreeNext; // Free list linkage
private:
DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE,
sizeof(volatile markOop) + sizeof(void * volatile) +
sizeof(ObjectMonitor *));
protected: // protected for JvmtiRawMonitor
void * volatile _owner; // pointer to owning thread OR BasicLock
volatile jlong _previous_owner_tid; // thread id of the previous owner of the monitor
volatile intptr_t _recursions; // recursion count, 0 for first entry
ObjectWaiter * volatile _EntryList; // Threads blocked on entry or reentry.
// The list is actually composed of WaitNodes,
// acting as proxies for Threads.
private:
ObjectWaiter * volatile _cxq; // LL of recently-arrived threads blocked on entry.
Thread * volatile _succ; // Heir presumptive thread - used for futile wakeup throttling
Thread * volatile _Responsible;
volatile int _Spinner; // for exit->spinner handoff optimization
volatile int _SpinFreq; // Spin 1-out-of-N attempts: success rate
volatile int _SpinClock;
volatile intptr_t _SpinState; // MCS/CLH list of spinners
volatile int _SpinDuration;
volatile jint _count; // reference count to prevent reclamation/deflation
// at stop-the-world time. See deflate_idle_monitors().
// _count is approximately |_WaitSet| + |_EntryList|
protected:
ObjectWaiter * volatile _WaitSet; // LL of threads wait()ing on the monitor
volatile jint _waiters; // number of waiting threads
private:
volatile int _WaitSetLock; // protects Wait Queue - simple spinlock
public:
static void Initialize();
static PerfCounter * _sync_ContendedLockAttempts;
static PerfCounter * _sync_FutileWakeups;
static PerfCounter * _sync_Parks;
static PerfCounter * _sync_EmptyNotifications;
static PerfCounter * _sync_Notifications;
static PerfCounter * _sync_SlowEnter;
static PerfCounter * _sync_SlowExit;
static PerfCounter * _sync_SlowNotify;
static PerfCounter * _sync_SlowNotifyAll;
static PerfCounter * _sync_FailedSpins;
static PerfCounter * _sync_SuccessfulSpins;
static PerfCounter * _sync_PrivateA;
static PerfCounter * _sync_PrivateB;
static PerfCounter * _sync_MonInCirculation;
static PerfCounter * _sync_MonScavenged;
static PerfCounter * _sync_Inflations;
static PerfCounter * _sync_Deflations;
static PerfLongVariable * _sync_MonExtant;
static int Knob_Verbose;
static int Knob_VerifyInUse;
static int Knob_SpinLimit;
void* operator new (size_t size) throw() {
return AllocateHeap(size, mtInternal);
}
void* operator new[] (size_t size) throw() {
return operator new (size);
}
void operator delete(void* p) {
FreeHeap(p, mtInternal);
}
void operator delete[] (void *p) {
operator delete(p);
}
// TODO-FIXME: the "offset" routines should return a type of off_t instead of int ...
// ByteSize would also be an appropriate type.
static int header_offset_in_bytes() { return offset_of(ObjectMonitor, _header); }
@ -100,14 +228,11 @@ class ObjectMonitor {
static int Responsible_offset_in_bytes() { return offset_of(ObjectMonitor, _Responsible); }
static int Spinner_offset_in_bytes() { return offset_of(ObjectMonitor, _Spinner); }
public:
// Eventually we'll make provisions for multiple callbacks, but
// now one will suffice.
static int (*SpinCallbackFunction)(intptr_t, int);
static intptr_t SpinCallbackArgument;
public:
markOop header() const;
void set_header(markOop hdr);
@ -123,39 +248,22 @@ class ObjectMonitor {
void* owner() const;
void set_owner(void* owner);
intptr_t waiters() const;
jint waiters() const;
intptr_t count() const;
void set_count(intptr_t count);
intptr_t contentions() const;
jint count() const;
void set_count(jint count);
jint contentions() const;
intptr_t recursions() const { return _recursions; }
// JVM/DI GetMonitorInfo() needs this
// JVM/TI GetObjectMonitorUsage() needs this:
ObjectWaiter* first_waiter() { return _WaitSet; }
ObjectWaiter* next_waiter(ObjectWaiter* o) { return o->_next; }
Thread* thread_of_waiter(ObjectWaiter* o) { return o->_thread; }
// initialize the monitor, exception the semaphore, all other fields
// are simple integers or pointers
ObjectMonitor() {
_header = NULL;
_count = 0;
_waiters = 0;
_recursions = 0;
_object = NULL;
_owner = NULL;
_WaitSet = NULL;
_WaitSetLock = 0;
_Responsible = NULL;
_succ = NULL;
_cxq = NULL;
FreeNext = NULL;
_EntryList = NULL;
_SpinFreq = 0;
_SpinClock = 0;
OwnerIsThread = 0;
_previous_owner_tid = 0;
}
protected:
// We don't typically expect or want the ctors or dtors to run.
// normal ObjectMonitors are type-stable and immortal.
ObjectMonitor() { ::memset((void *)this, 0, sizeof(*this)); }
~ObjectMonitor() {
// TODO: Add asserts ...
@ -169,7 +277,7 @@ class ObjectMonitor {
// _cxq == 0 _succ == NULL _owner == NULL _waiters == 0
// _count == 0 EntryList == NULL
// _recursions == 0 _WaitSet == NULL
// TODO: assert (is_busy()|_recursions) == 0
assert(((is_busy()|_recursions) == 0), "freeing inuse monitor");
_succ = NULL;
_EntryList = NULL;
_cxq = NULL;
@ -177,7 +285,6 @@ class ObjectMonitor {
_recursions = 0;
_SpinFreq = 0;
_SpinClock = 0;
OwnerIsThread = 0;
}
public:
@ -221,7 +328,6 @@ class ObjectMonitor {
int TrySpin_Fixed(Thread * Self);
int TrySpin_VaryFrequency(Thread * Self);
int TrySpin_VaryDuration(Thread * Self);
void ctAsserts();
void ExitEpilog(Thread * Self, ObjectWaiter * Wakee);
bool ExitSuspendEquivalent(JavaThread * Self);
void post_monitor_wait_event(EventJavaMonitorWait * event,
@ -229,102 +335,6 @@ class ObjectMonitor {
jlong timeout,
bool timedout);
private:
friend class ObjectSynchronizer;
friend class ObjectWaiter;
friend class VMStructs;
// WARNING: this must be the very first word of ObjectMonitor
// This means this class can't use any virtual member functions.
volatile markOop _header; // displaced object header word - mark
void* volatile _object; // backward object pointer - strong root
double SharingPad[1]; // temp to reduce false sharing
// All the following fields must be machine word aligned
// The VM assumes write ordering wrt these fields, which can be
// read from other threads.
protected: // protected for jvmtiRawMonitor
void * volatile _owner; // pointer to owning thread OR BasicLock
volatile jlong _previous_owner_tid; // thread id of the previous owner of the monitor
volatile intptr_t _recursions; // recursion count, 0 for first entry
private:
int OwnerIsThread; // _owner is (Thread *) vs SP/BasicLock
ObjectWaiter * volatile _cxq; // LL of recently-arrived threads blocked on entry.
// The list is actually composed of WaitNodes, acting
// as proxies for Threads.
protected:
ObjectWaiter * volatile _EntryList; // Threads blocked on entry or reentry.
private:
Thread * volatile _succ; // Heir presumptive thread - used for futile wakeup throttling
Thread * volatile _Responsible;
int _PromptDrain; // rqst to drain cxq into EntryList ASAP
volatile int _Spinner; // for exit->spinner handoff optimization
volatile int _SpinFreq; // Spin 1-out-of-N attempts: success rate
volatile int _SpinClock;
volatile int _SpinDuration;
volatile intptr_t _SpinState; // MCS/CLH list of spinners
// TODO-FIXME: _count, _waiters and _recursions should be of
// type int, or int32_t but not intptr_t. There's no reason
// to use 64-bit fields for these variables on a 64-bit JVM.
volatile intptr_t _count; // reference count to prevent reclamation/deflation
// at stop-the-world time. See deflate_idle_monitors().
// _count is approximately |_WaitSet| + |_EntryList|
protected:
volatile intptr_t _waiters; // number of waiting threads
private:
protected:
ObjectWaiter * volatile _WaitSet; // LL of threads wait()ing on the monitor
private:
volatile int _WaitSetLock; // protects Wait Queue - simple spinlock
public:
int _QMix; // Mixed prepend queue discipline
ObjectMonitor * FreeNext; // Free list linkage
intptr_t StatA, StatsB;
public:
static void Initialize();
static PerfCounter * _sync_ContendedLockAttempts;
static PerfCounter * _sync_FutileWakeups;
static PerfCounter * _sync_Parks;
static PerfCounter * _sync_EmptyNotifications;
static PerfCounter * _sync_Notifications;
static PerfCounter * _sync_SlowEnter;
static PerfCounter * _sync_SlowExit;
static PerfCounter * _sync_SlowNotify;
static PerfCounter * _sync_SlowNotifyAll;
static PerfCounter * _sync_FailedSpins;
static PerfCounter * _sync_SuccessfulSpins;
static PerfCounter * _sync_PrivateA;
static PerfCounter * _sync_PrivateB;
static PerfCounter * _sync_MonInCirculation;
static PerfCounter * _sync_MonScavenged;
static PerfCounter * _sync_Inflations;
static PerfCounter * _sync_Deflations;
static PerfLongVariable * _sync_MonExtant;
public:
static int Knob_Verbose;
static int Knob_VerifyInUse;
static int Knob_SpinLimit;
void* operator new (size_t size) throw() {
return AllocateHeap(size, mtInternal);
}
void* operator new[] (size_t size) throw() {
return operator new (size);
}
void operator delete(void* p) {
FreeHeap(p, mtInternal);
}
void operator delete[] (void *p) {
operator delete(p);
}
};
#undef TEVENT

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -40,15 +40,11 @@ inline void ObjectMonitor::set_header(markOop hdr) {
_header = hdr;
}
inline intptr_t ObjectMonitor::count() const {
inline jint ObjectMonitor::count() const {
return _count;
}
inline void ObjectMonitor::set_count(intptr_t count) {
_count= count;
}
inline intptr_t ObjectMonitor::waiters() const {
inline jint ObjectMonitor::waiters() const {
return _waiters;
}
@ -61,7 +57,7 @@ inline void ObjectMonitor::clear() {
assert(_count == 0, "Fatal logic error in ObjectMonitor count!");
assert(_waiters == 0, "Fatal logic error in ObjectMonitor waiters!");
assert(_recursions == 0, "Fatal logic error in ObjectMonitor recursions!");
assert(_object, "Fatal logic error in ObjectMonitor object!");
assert(_object != NULL, "Fatal logic error in ObjectMonitor object!");
assert(_owner == 0, "Fatal logic error in ObjectMonitor owner!");
_header = NULL;
@ -85,7 +81,6 @@ inline bool ObjectMonitor::check(TRAPS) {
if (THREAD != _owner) {
if (THREAD->is_lock_owned((address) _owner)) {
_owner = THREAD; // regain ownership of inflated monitor
OwnerIsThread = 1 ;
assert (_recursions == 0, "invariant") ;
} else {
check_slow(THREAD);
@ -97,7 +92,7 @@ inline bool ObjectMonitor::check(TRAPS) {
// return number of threads contending for this monitor
inline intptr_t ObjectMonitor::contentions() const {
inline jint ObjectMonitor::contentions() const {
return _count;
}

View File

@ -24,6 +24,7 @@
#include "precompiled.hpp"
#include "classfile/vmSymbols.hpp"
#include "memory/padded.hpp"
#include "memory/resourceArea.hpp"
#include "oops/markOop.hpp"
#include "oops/oop.inline.hpp"
@ -110,6 +111,8 @@ int dtrace_waited_probe(ObjectMonitor* monitor, Handle obj, Thread* thr) {
#define NINFLATIONLOCKS 256
static volatile intptr_t InflationLocks[NINFLATIONLOCKS];
// gBlockList is really PaddedEnd<ObjectMonitor> *, but we don't
// want to expose the PaddedEnd template more than necessary.
ObjectMonitor * ObjectSynchronizer::gBlockList = NULL;
ObjectMonitor * volatile ObjectSynchronizer::gFreeList = NULL;
ObjectMonitor * volatile ObjectSynchronizer::gOmInUseList = NULL;
@ -410,16 +413,15 @@ void ObjectSynchronizer::notifyall(Handle obj, TRAPS) {
// performed by the CPU(s) or platform.
struct SharedGlobals {
char _pad_prefix[DEFAULT_CACHE_LINE_SIZE];
// These are highly shared mostly-read variables.
// To avoid false-sharing they need to be the sole occupants of a $ line.
double padPrefix[8];
// To avoid false-sharing they need to be the sole occupants of a cache line.
volatile int stwRandom;
volatile int stwCycle;
// Hot RW variables -- Sequester to avoid false-sharing
double padSuffix[16];
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
// Hot RW variable -- Sequester to avoid false-sharing
volatile int hcSequence;
double padFinal[8];
DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int));
};
static SharedGlobals GVars;
@ -780,18 +782,18 @@ JavaThread* ObjectSynchronizer::get_lock_owner(Handle h_obj, bool doLock) {
// Visitors ...
void ObjectSynchronizer::monitors_iterate(MonitorClosure* closure) {
ObjectMonitor* block = gBlockList;
PaddedEnd<ObjectMonitor> * block = (PaddedEnd<ObjectMonitor> *)gBlockList;
ObjectMonitor* mid;
while (block) {
assert(block->object() == CHAINMARKER, "must be a block header");
for (int i = _BLOCKSIZE - 1; i > 0; i--) {
mid = block + i;
mid = (ObjectMonitor *)(block + i);
oop object = (oop) mid->object();
if (object != NULL) {
closure->do_monitor(mid);
}
}
block = (ObjectMonitor*) block->FreeNext;
block = (PaddedEnd<ObjectMonitor> *) block->FreeNext;
}
}
@ -806,10 +808,12 @@ static inline ObjectMonitor* next(ObjectMonitor* block) {
void ObjectSynchronizer::oops_do(OopClosure* f) {
assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
for (ObjectMonitor* block = gBlockList; block != NULL; block = next(block)) {
for (PaddedEnd<ObjectMonitor> * block =
(PaddedEnd<ObjectMonitor> *)gBlockList; block != NULL;
block = (PaddedEnd<ObjectMonitor> *)next(block)) {
assert(block->object() == CHAINMARKER, "must be a block header");
for (int i = 1; i < _BLOCKSIZE; i++) {
ObjectMonitor* mid = &block[i];
ObjectMonitor* mid = (ObjectMonitor *)&block[i];
if (mid->object() != NULL) {
f->do_oop((oop*)mid->object_addr());
}
@ -966,16 +970,29 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::omAlloc(Thread * Self) {
// 3: allocate a block of new ObjectMonitors
// Both the local and global free lists are empty -- resort to malloc().
// In the current implementation objectMonitors are TSM - immortal.
// Ideally, we'd write "new ObjectMonitor[_BLOCKSIZE], but we want
// each ObjectMonitor to start at the beginning of a cache line,
// so we use align_size_up().
// A better solution would be to use C++ placement-new.
// BEWARE: As it stands currently, we don't run the ctors!
assert(_BLOCKSIZE > 1, "invariant");
ObjectMonitor * temp = new ObjectMonitor[_BLOCKSIZE];
size_t neededsize = sizeof(PaddedEnd<ObjectMonitor>) * _BLOCKSIZE;
PaddedEnd<ObjectMonitor> * temp;
size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1);
void* real_malloc_addr = (void *)NEW_C_HEAP_ARRAY(char, aligned_size,
mtInternal);
temp = (PaddedEnd<ObjectMonitor> *)
align_size_up((intptr_t)real_malloc_addr,
DEFAULT_CACHE_LINE_SIZE);
// NOTE: (almost) no way to recover if allocation failed.
// We might be able to induce a STW safepoint and scavenge enough
// objectMonitors to permit progress.
if (temp == NULL) {
vm_exit_out_of_memory(sizeof (ObjectMonitor[_BLOCKSIZE]), OOM_MALLOC_ERROR,
vm_exit_out_of_memory(neededsize, OOM_MALLOC_ERROR,
"Allocate ObjectMonitors");
}
(void)memset((void *) temp, 0, neededsize);
// Format the block.
// initialize the linked list, each monitor points to its next
@ -986,7 +1003,7 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::omAlloc(Thread * Self) {
// look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; }
for (int i = 1; i < _BLOCKSIZE; i++) {
temp[i].FreeNext = &temp[i+1];
temp[i].FreeNext = (ObjectMonitor *)&temp[i+1];
}
// terminate the last monitor as the end of list
@ -1141,10 +1158,6 @@ ObjectMonitor* ObjectSynchronizer::inflate_helper(oop obj) {
}
// Note that we could encounter some performance loss through false-sharing as
// multiple locks occupy the same $ line. Padding might be appropriate.
ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
oop object) {
// Inflate mutates the heap ...
@ -1210,7 +1223,6 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
// in which INFLATING appears in the mark.
m->Recycle();
m->_Responsible = NULL;
m->OwnerIsThread = 0;
m->_recursions = 0;
m->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // Consider: maintain by type/class
@ -1257,8 +1269,8 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
m->set_header(dmw);
// Optimization: if the mark->locker stack address is associated
// with this thread we could simply set m->_owner = Self and
// m->OwnerIsThread = 1. Note that a thread can inflate an object
// with this thread we could simply set m->_owner = Self.
// Note that a thread can inflate an object
// that it has stack-locked -- as might happen in wait() -- directly
// with CAS. That is, we can avoid the xchg-NULL .... ST idiom.
m->set_owner(mark->locker());
@ -1302,7 +1314,6 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
m->set_header(mark);
m->set_owner(NULL);
m->set_object(object);
m->OwnerIsThread = 1;
m->_recursions = 0;
m->_Responsible = NULL;
m->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // consider: keep metastats by type/class
@ -1310,7 +1321,6 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
if (Atomic::cmpxchg_ptr (markOopDesc::encode(m), object->mark_addr(), mark) != mark) {
m->set_object(NULL);
m->set_owner(NULL);
m->OwnerIsThread = 0;
m->Recycle();
omRelease(Self, m, true);
m = NULL;
@ -1336,9 +1346,6 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
}
}
// Note that we could encounter some performance loss through false-sharing as
// multiple locks occupy the same $ line. Padding might be appropriate.
// Deflate_idle_monitors() is called at all safepoints, immediately
// after all mutators are stopped, but before any objects have moved.
@ -1491,12 +1498,14 @@ void ObjectSynchronizer::deflate_idle_monitors() {
nInuse += gOmInUseCount;
}
} else for (ObjectMonitor* block = gBlockList; block != NULL; block = next(block)) {
} else for (PaddedEnd<ObjectMonitor> * block =
(PaddedEnd<ObjectMonitor> *)gBlockList; block != NULL;
block = (PaddedEnd<ObjectMonitor> *)next(block)) {
// Iterate over all extant monitors - Scavenge all idle monitors.
assert(block->object() == CHAINMARKER, "must be a block header");
nInCirculation += _BLOCKSIZE;
for (int i = 1; i < _BLOCKSIZE; i++) {
ObjectMonitor* mid = &block[i];
ObjectMonitor* mid = (ObjectMonitor*)&block[i];
oop obj = (oop) mid->object();
if (obj == NULL) {
@ -1648,18 +1657,18 @@ void ObjectSynchronizer::sanity_checks(const bool verbose,
// Verify all monitors in the monitor cache, the verification is weak.
void ObjectSynchronizer::verify() {
ObjectMonitor* block = gBlockList;
PaddedEnd<ObjectMonitor> * block = (PaddedEnd<ObjectMonitor> *)gBlockList;
ObjectMonitor* mid;
while (block) {
assert(block->object() == CHAINMARKER, "must be a block header");
for (int i = 1; i < _BLOCKSIZE; i++) {
mid = block + i;
mid = (ObjectMonitor *)(block + i);
oop object = (oop) mid->object();
if (object != NULL) {
mid->verify();
}
}
block = (ObjectMonitor*) block->FreeNext;
block = (PaddedEnd<ObjectMonitor> *) block->FreeNext;
}
}
@ -1668,18 +1677,19 @@ void ObjectSynchronizer::verify() {
// the list of extant blocks without taking a lock.
int ObjectSynchronizer::verify_objmon_isinpool(ObjectMonitor *monitor) {
ObjectMonitor* block = gBlockList;
PaddedEnd<ObjectMonitor> * block = (PaddedEnd<ObjectMonitor> *)gBlockList;
while (block) {
assert(block->object() == CHAINMARKER, "must be a block header");
if (monitor > &block[0] && monitor < &block[_BLOCKSIZE]) {
if (monitor > (ObjectMonitor *)&block[0] &&
monitor < (ObjectMonitor *)&block[_BLOCKSIZE]) {
address mon = (address) monitor;
address blk = (address) block;
size_t diff = mon - blk;
assert((diff % sizeof(ObjectMonitor)) == 0, "check");
assert((diff % sizeof(PaddedEnd<ObjectMonitor>)) == 0, "check");
return 1;
}
block = (ObjectMonitor*) block->FreeNext;
block = (PaddedEnd<ObjectMonitor> *) block->FreeNext;
}
return 0;
}

View File

@ -134,6 +134,8 @@ class ObjectSynchronizer : AllStatic {
private:
enum { _BLOCKSIZE = 128 };
// gBlockList is really PaddedEnd<ObjectMonitor> *, but we don't
// want to expose the PaddedEnd template more than necessary.
static ObjectMonitor* gBlockList;
static ObjectMonitor * volatile gFreeList;
// global monitor in use list, for moribund threads,

View File

@ -1070,8 +1070,8 @@ typedef TwoOopHashtable<Symbol*, mtClass> SymbolTwoOopHashtable;
volatile_nonstatic_field(ObjectMonitor, _header, markOop) \
unchecked_nonstatic_field(ObjectMonitor, _object, sizeof(void *)) /* NOTE: no type */ \
unchecked_nonstatic_field(ObjectMonitor, _owner, sizeof(void *)) /* NOTE: no type */ \
volatile_nonstatic_field(ObjectMonitor, _count, intptr_t) \
volatile_nonstatic_field(ObjectMonitor, _waiters, intptr_t) \
volatile_nonstatic_field(ObjectMonitor, _count, jint) \
volatile_nonstatic_field(ObjectMonitor, _waiters, jint) \
volatile_nonstatic_field(ObjectMonitor, _recursions, intptr_t) \
nonstatic_field(ObjectMonitor, FreeNext, ObjectMonitor*) \
volatile_nonstatic_field(BasicLock, _displaced_header, markOop) \
@ -2507,6 +2507,12 @@ typedef TwoOopHashtable<Symbol*, mtClass> SymbolTwoOopHashtable;
declare_constant(Deoptimization::Action_make_not_compilable) \
declare_constant(Deoptimization::Action_LIMIT) \
\
/***************************************************/ \
/* DEFAULT_CACHE_LINE_SIZE (globalDefinitions.hpp) */ \
/***************************************************/ \
\
declare_constant(DEFAULT_CACHE_LINE_SIZE) \
\
/*********************/ \
/* Matcher (C2 only) */ \
/*********************/ \

View File

@ -540,7 +540,9 @@ inline address clamp_address_in_page(address addr, address page_address, intptr_
// The expected size in bytes of a cache line, used to pad data structures.
#define DEFAULT_CACHE_LINE_SIZE 64
#ifndef DEFAULT_CACHE_LINE_SIZE
#define DEFAULT_CACHE_LINE_SIZE 64
#endif
//----------------------------------------------------------------------------------------------------