8049737: Contended Locking reorder and cache line bucket

JEP-143/JDK-8046133 - optimization #1 - reorder and cache line bucket. Co-authored-by: Dave Dice <dave.dice@oracle.com> Co-authored-by: Karen Kinnear <karen.kinnear@oracle.com> Reviewed-by: shade, dice, dholmes, dsimms
2014-10-14 10:32:12 -07:00 · 2014-10-14 10:32:12 -07:00 · f1ab0fae73
commit f1ab0fae73
parent 51866388d1
13 changed files with 302 additions and 247 deletions
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -50,8 +50,8 @@ public class ObjectMonitor extends VMObject {
    ownerFieldOffset = f.getOffset();
    f = type.getField("FreeNext");
    FreeNextFieldOffset = f.getOffset();
-    countField  = type.getCIntegerField("_count");
-    waitersField = type.getCIntegerField("_waiters");
+    countField  = type.getJIntField("_count");
+    waitersField = type.getJIntField("_waiters");
    recursionsField = type.getCIntegerField("_recursions");
  }

@ -81,15 +81,15 @@ public class ObjectMonitor extends VMObject {
  // FIXME
  //  void      set_owner(void* owner);

-  public long    waiters() { return waitersField.getValue(addr); }
+  public int    waiters() { return waitersField.getValue(addr); }

  public Address freeNext() { return addr.getAddressAt(FreeNextFieldOffset); }
  // FIXME
  //  void      set_queue(void* owner);

-  public long count() { return countField.getValue(addr); }
+  public int count() { return countField.getValue(addr); }
  // FIXME
-  //  void      set_count(intptr_t count);
+  //  void      set_count(int count);

  public long recursions() { return recursionsField.getValue(addr); }

@ -97,18 +97,9 @@ public class ObjectMonitor extends VMObject {
    return addr.getOopHandleAt(objectFieldOffset);
  }

-  public long contentions() {
-      // refer to objectMonitor_xxx.inline.hpp - contentions definition.
-      // for Solaris and Linux, contentions is same as count. for Windows
-      // it is different (objectMonitor_win32.inline.hpp)
-      long count = count();
-      if (VM.getVM().getOS().equals("win32")) {
-          // don't count the owner of the monitor
-          return count > 0? count - 1 : 0;
-      } else {
-          // Solaris and Linux
-          return count;
-      }
+  // contentions is always equal to count
+  public int contentions() {
+      return count();
  }

  // FIXME
@ -123,8 +114,8 @@ public class ObjectMonitor extends VMObject {
  private static long          objectFieldOffset;
  private static long          ownerFieldOffset;
  private static long          FreeNextFieldOffset;
-  private static CIntegerField countField;
-  private static CIntegerField waitersField;
+  private static JIntField     countField;
+  private static JIntField     waitersField;
  private static CIntegerField recursionsField;
  // FIXME: expose platform-dependent stuff
 }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectSynchronizer.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectSynchronizer.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -48,9 +48,17 @@ public class ObjectSynchronizer {
      blockListField = type.getAddressField("gBlockList");
      gBlockListAddr = blockListField.getValue();
      blockSize = db.lookupIntConstant("ObjectSynchronizer::_BLOCKSIZE").intValue();
+      defaultCacheLineSize = db.lookupIntConstant("DEFAULT_CACHE_LINE_SIZE").intValue();
    } catch (RuntimeException e) { }
    type = db.lookupType("ObjectMonitor");
    objectMonitorTypeSize = type.getSize();
+    if ((objectMonitorTypeSize % defaultCacheLineSize) != 0) {
+      // sizeof(ObjectMonitor) is not already a multiple of a cache line.
+      // The ObjectMonitor allocation code in ObjectSynchronizer pads each
+      // ObjectMonitor in a block to the next cache line boundary.
+      int needLines = ((int)objectMonitorTypeSize / defaultCacheLineSize) + 1;
+      objectMonitorTypeSize = needLines * defaultCacheLineSize;
+    }
  }

  public long identityHashValueFor(Oop obj) {
@ -122,6 +130,7 @@ public class ObjectSynchronizer {

  private static Address gBlockListAddr;
  private static int blockSize;
+  private static int defaultCacheLineSize;
  private static long objectMonitorTypeSize;

 }
--- a/hotspot/src/cpu/sparc/vm/globalDefinitions_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/globalDefinitions_sparc.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -38,4 +38,26 @@ const bool CCallingConventionRequiresIntsAsLongs = false;

 #define SUPPORTS_NATIVE_CX8

+// The expected size in bytes of a cache line, used to pad data structures.
+#if defined(TIERED)
+  #ifdef _LP64
+    // tiered, 64-bit, large machine
+    #define DEFAULT_CACHE_LINE_SIZE 128
+  #else
+    // tiered, 32-bit, medium machine
+    #define DEFAULT_CACHE_LINE_SIZE 64
+  #endif
+#elif defined(COMPILER1)
+  // pure C1, 32-bit, small machine
+  #define DEFAULT_CACHE_LINE_SIZE 16
+#elif defined(COMPILER2) || defined(SHARK)
+  #ifdef _LP64
+    // pure C2, 64-bit, large machine
+    #define DEFAULT_CACHE_LINE_SIZE 128
+  #else
+    // pure C2, 32-bit, medium machine
+    #define DEFAULT_CACHE_LINE_SIZE 64
+  #endif
+#endif
+
 #endif // CPU_SPARC_VM_GLOBALDEFINITIONS_SPARC_HPP
--- a/hotspot/src/cpu/x86/vm/globalDefinitions_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/globalDefinitions_x86.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -35,4 +35,27 @@ const bool CCallingConventionRequiresIntsAsLongs = false;

 #define SUPPORTS_NATIVE_CX8

+// The expected size in bytes of a cache line, used to pad data structures.
+#if defined(TIERED)
+  #ifdef _LP64
+    // tiered, 64-bit, large machine
+    #define DEFAULT_CACHE_LINE_SIZE 128
+  #else
+    // tiered, 32-bit, medium machine
+    #define DEFAULT_CACHE_LINE_SIZE 64
+  #endif
+#elif defined(COMPILER1)
+  // pure C1, 32-bit, small machine
+  // i486 was the last Intel chip with 16-byte cache line size
+  #define DEFAULT_CACHE_LINE_SIZE 32
+#elif defined(COMPILER2) || defined(SHARK)
+  #ifdef _LP64
+    // pure C2, 64-bit, large machine
+    #define DEFAULT_CACHE_LINE_SIZE 128
+  #else
+    // pure C2, 32-bit, medium machine
+    #define DEFAULT_CACHE_LINE_SIZE 64
+  #endif
+#endif
+
 #endif // CPU_X86_VM_GLOBALDEFINITIONS_X86_HPP
--- a/hotspot/src/share/vm/memory/padded.hpp
+++ b/hotspot/src/share/vm/memory/padded.hpp
@ -76,10 +76,16 @@ class PaddedEndImpl<T, /*pad_size*/ 0> : public T {
 // if the start address is a multiple of alignment.
 template <class T, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
 class PaddedEnd : public PaddedEndImpl<T, PADDED_END_SIZE(T, alignment)> {
-  // C++ don't allow zero-length arrays. The padding is put in a
+  // C++ doesn't allow zero-length arrays. The padding is put in a
  // super class that is specialized for the pad_size == 0 case.
 };

+// Similar to PaddedEnd, this macro defines a _pad_buf#id field
+// that is (alignment - size) bytes in size. This macro is used
+// to add padding in between non-class fields in a class or struct.
+#define DEFINE_PAD_MINUS_SIZE(id, alignment, size) \
+          char _pad_buf##id[(alignment) - (size)]
+
 // Helper class to create an array of PaddedEnd<T> objects. All elements will
 // start at a multiple of alignment and the size will be aligned to alignment.
 template <class T, MEMFLAGS flags, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
--- a/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp
+++ b/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp
@ -1031,7 +1031,7 @@ JvmtiEnvBase::get_object_monitor_usage(JavaThread* calling_thread, jobject objec
    // implied else: entry_count == 0
  }

-  int nWant,nWait;
+  jint nWant, nWait;
  if (mon != NULL) {
    // this object has a heavyweight monitor
    nWant = mon->contentions(); // # of threads contending for monitor
--- a/hotspot/src/share/vm/runtime/objectMonitor.cpp
+++ b/hotspot/src/share/vm/runtime/objectMonitor.cpp
@ -257,7 +257,6 @@ bool ObjectMonitor::try_enter(Thread* THREAD) {
      assert(_recursions == 0, "internal state error");
      _owner = THREAD;
      _recursions = 1;
-      OwnerIsThread = 1;
      return true;
    }
    if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) {
@ -280,7 +279,6 @@ void NOINLINE ObjectMonitor::enter(TRAPS) {
    // Either ASSERT _recursions == 0 or explicitly set _recursions = 0.
    assert(_recursions == 0, "invariant");
    assert(_owner == Self, "invariant");
-    // CONSIDER: set or assert OwnerIsThread == 1
    return;
  }

@ -296,7 +294,6 @@ void NOINLINE ObjectMonitor::enter(TRAPS) {
    // Commute owner from a thread-specific on-stack BasicLockObject address to
    // a full-fledged "Thread *".
    _owner = Self;
-    OwnerIsThread = 1;
    return;
  }

@ -328,7 +325,7 @@ void NOINLINE ObjectMonitor::enter(TRAPS) {

  // Prevent deflation at STW-time.  See deflate_idle_monitors() and is_busy().
  // Ensure the object-monitor relationship remains stable while there's contention.
-  Atomic::inc_ptr(&_count);
+  Atomic::inc(&_count);

  EventJavaMonitorEnter event;

@ -384,7 +381,7 @@ void NOINLINE ObjectMonitor::enter(TRAPS) {
    // acquire it.
  }

-  Atomic::dec_ptr(&_count);
+  Atomic::dec(&_count);
  assert(_count >= 0, "invariant");
  Self->_Stalled = 0;

@ -440,7 +437,6 @@ int ObjectMonitor::TryLock(Thread * Self) {
    // Either guarantee _recursions == 0 or set _recursions = 0.
    assert(_recursions == 0, "invariant");
    assert(_owner == Self, "invariant");
-    // CONSIDER: set or assert that OwnerIsThread == 1
    return 1;
  }
  // The lock had been free momentarily, but we lost the race to the lock.
@ -922,7 +918,6 @@ void NOINLINE ObjectMonitor::exit(bool not_suspended, TRAPS) {
      assert(_recursions == 0, "invariant");
      _owner = THREAD;
      _recursions = 0;
-      OwnerIsThread = 1;
    } else {
      // Apparent unbalanced locking ...
      // Naively we'd like to throw IllegalMonitorStateException.
@ -1346,7 +1341,6 @@ intptr_t ObjectMonitor::complete_exit(TRAPS) {
      assert(_recursions == 0, "internal state error");
      _owner = THREAD;   // Convert from basiclock addr to Thread addr
      _recursions = 0;
-      OwnerIsThread = 1;
    }
  }

@ -1385,7 +1379,6 @@ void ObjectMonitor::reenter(intptr_t recursions, TRAPS) {
      if (THREAD->is_lock_owned((address) _owner)) {                        \
        _owner = THREAD;  /* Convert from basiclock addr to Thread addr */  \
        _recursions = 0;                                                    \
-        OwnerIsThread = 1;                                                  \
      } else {                                                              \
        TEVENT(Throw IMSX);                                                 \
        THROW(vmSymbols::java_lang_IllegalMonitorStateException());         \
@ -1906,8 +1899,8 @@ void ObjectMonitor::notifyAll(TRAPS) {
 // a contending thread could enqueue itself on the cxq and then spin locally
 // on a thread-specific variable such as its ParkEvent._Event flag.
 // That's left as an exercise for the reader.  Note that global spinning is
-// not problematic on Niagara, as the L2$ serves the interconnect and has both
-// low latency and massive bandwidth.
+// not problematic on Niagara, as the L2 cache serves the interconnect and
+// has both low latency and massive bandwidth.
 //
 // Broadly, we can fix the spin frequency -- that is, the % of contended lock
 // acquisition attempts where we opt to spin --  at 100% and vary the spin count
@ -2208,7 +2201,7 @@ int ObjectMonitor::TrySpin_VaryDuration(Thread * Self) {
 // as advisory.
 //
 // Beware too, that _owner is sometimes a BasicLock address and sometimes
-// a thread pointer.  We differentiate the two cases with OwnerIsThread.
+// a thread pointer.
 // Alternately, we might tag the type (thread pointer vs basiclock pointer)
 // with the LSB of _owner.  Another option would be to probablistically probe
 // the putative _owner->TypeTag value.
@ -2230,9 +2223,7 @@ int ObjectMonitor::TrySpin_VaryDuration(Thread * Self) {


 int ObjectMonitor::NotRunnable(Thread * Self, Thread * ox) {
-  // Check either OwnerIsThread or ox->TypeTag == 2BAD.
-  if (!OwnerIsThread) return 0;
-
+  // Check ox->TypeTag == 2BAD.
  if (ox == NULL) return 0;

  // Avoid transitive spinning ...
@ -2399,20 +2390,6 @@ void ObjectMonitor::Initialize() {
  }
 }

-
-// Compile-time asserts
-// When possible, it's better to catch errors deterministically at
-// compile-time than at runtime.  The down-side to using compile-time
-// asserts is that error message -- often something about negative array
-// indices -- is opaque.
-
-#define CTASSERT(x) { int tag[1-(2*!(x))]; printf ("Tag @" INTPTR_FORMAT "\n", (intptr_t)tag); }
-
-void ObjectMonitor::ctAsserts() {
-  CTASSERT(offset_of (ObjectMonitor, _header) == 0);
-}
-
-
 static char * kvGet(char * kvList, const char * Key) {
  if (kvList == NULL) return NULL;
  size_t n = strlen(Key);
@ -2526,6 +2503,8 @@ void ObjectMonitor::sanity_checks() {
  if (verbose) {
    tty->print_cr("INFO: sizeof(ObjectMonitor)=" SIZE_FORMAT,
                  sizeof(ObjectMonitor));
+    tty->print_cr("INFO: sizeof(PaddedEnd<ObjectMonitor>)=" SIZE_FORMAT,
+                  sizeof(PaddedEnd<ObjectMonitor>));
  }

  uint cache_line_size = VM_Version::L1_data_cache_line_size();
@ -2559,9 +2538,9 @@ void ObjectMonitor::sanity_checks() {
      warning_cnt++;
    }

-    if ((sizeof(ObjectMonitor) % cache_line_size) != 0) {
-      tty->print_cr("WARNING: ObjectMonitor size is not a multiple of "
-                    "a cache line which permits false sharing.");
+    if ((sizeof(PaddedEnd<ObjectMonitor>) % cache_line_size) != 0) {
+      tty->print_cr("WARNING: PaddedEnd<ObjectMonitor> size is not a "
+                    "multiple of a cache line which permits false sharing.");
      warning_cnt++;
    }
  }
--- a/hotspot/src/share/vm/runtime/objectMonitor.hpp
+++ b/hotspot/src/share/vm/runtime/objectMonitor.hpp
@ -25,6 +25,7 @@
 #ifndef SHARE_VM_RUNTIME_OBJECTMONITOR_HPP
 #define SHARE_VM_RUNTIME_OBJECTMONITOR_HPP

+#include "memory/padded.hpp"
 #include "runtime/os.hpp"
 #include "runtime/park.hpp"
 #include "runtime/perfData.hpp"
@ -58,21 +59,71 @@ class ObjectWaiter : public StackObj {
 // forward declaration to avoid include tracing.hpp
 class EventJavaMonitorWait;

-// WARNING:
-//   This is a very sensitive and fragile class. DO NOT make any
-// change unless you are fully aware of the underlying semantics.
-
-//   This class can not inherit from any other class, because I have
-// to let the displaced header be the very first word. Otherwise I
-// have to let markOop include this file, which would export the
-// monitor data structure to everywhere.
+// The ObjectMonitor class implements the heavyweight version of a
+// JavaMonitor. The lightweight BasicLock/stack lock version has been
+// inflated into an ObjectMonitor. This inflation is typically due to
+// contention or use of Object.wait().
 //
-// The ObjectMonitor class is used to implement JavaMonitors which have
-// transformed from the lightweight structure of the thread stack to a
-// heavy weight lock due to contention
-
-// It is also used as RawMonitor by the JVMTI
-
+// WARNING: This is a very sensitive and fragile class. DO NOT make any
+// changes unless you are fully aware of the underlying semantics.
+//
+// Class JvmtiRawMonitor currently inherits from ObjectMonitor so
+// changes in this class must be careful to not break JvmtiRawMonitor.
+// These two subsystems should be separated.
+//
+// ObjectMonitor Layout Overview/Highlights/Restrictions:
+//
+// - The _header field must be at offset 0 because the displaced header
+//   from markOop is stored there. We do not want markOop.hpp to include
+//   ObjectMonitor.hpp to avoid exposing ObjectMonitor everywhere. This
+//   means that ObjectMonitor cannot inherit from any other class nor can
+//   it use any virtual member functions. This restriction is critical to
+//   the proper functioning of the VM.
+// - The _header and _owner fields should be separated by enough space
+//   to avoid false sharing due to parallel access by different threads.
+//   This is an advisory recommendation.
+// - The general layout of the fields in ObjectMonitor is:
+//     _header
+//     <lightly_used_fields>
+//     <optional padding>
+//     _owner
+//     <remaining_fields>
+// - The VM assumes write ordering and machine word alignment with
+//   respect to the _owner field and the <remaining_fields> that can
+//   be read in parallel by other threads.
+// - Generally fields that are accessed closely together in time should
+//   be placed proximally in space to promote data cache locality. That
+//   is, temporal locality should condition spatial locality.
+// - We have to balance avoiding false sharing with excessive invalidation
+//   from coherence traffic. As such, we try to cluster fields that tend
+//   to be _written_ at approximately the same time onto the same data
+//   cache line.
+// - We also have to balance the natural tension between minimizing
+//   single threaded capacity misses with excessive multi-threaded
+//   coherency misses. There is no single optimal layout for both
+//   single-threaded and multi-threaded environments.
+//
+// - See ObjectMonitor::sanity_checks() for how critical restrictions are
+//   enforced and advisory recommendations are reported.
+// - Adjacent ObjectMonitors should be separated by enough space to avoid
+//   false sharing. This is handled by the ObjectMonitor allocation code
+//   in synchronizer.cpp. Also see ObjectSynchronizer::sanity_checks().
+//
+// Futures notes:
+//   - Separating _owner from the <remaining_fields> by enough space to
+//     avoid false sharing might be profitable. Given
+//     http://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
+//     we know that the CAS in monitorenter will invalidate the line
+//     underlying _owner. We want to avoid an L1 data cache miss on that
+//     same line for monitorexit. Putting these <remaining_fields>:
+//     _recursions, _EntryList, _cxq, and _succ, all of which may be
+//     fetched in the inflated unlock path, on a different cache line
+//     would make them immune to CAS-based invalidation from the _owner
+//     field.
+//
+//   - The _recursions field should be of type int, or int32_t but not
+//     intptr_t. There's no reason to use a 64-bit type for this field
+//     in a 64-bit JVM.

 class ObjectMonitor {
 public:
@ -84,7 +135,84 @@ class ObjectMonitor {
    OM_TIMED_OUT              // Object.wait() timed out
  };

+ private:
+  friend class ObjectSynchronizer;
+  friend class ObjectWaiter;
+  friend class VMStructs;
+
+  volatile markOop   _header;       // displaced object header word - mark
+  void*     volatile _object;       // backward object pointer - strong root
 public:
+  ObjectMonitor *    FreeNext;      // Free list linkage
+ private:
+  DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE,
+                        sizeof(volatile markOop) + sizeof(void * volatile) +
+                        sizeof(ObjectMonitor *));
+ protected:                         // protected for JvmtiRawMonitor
+  void *  volatile _owner;          // pointer to owning thread OR BasicLock
+  volatile jlong _previous_owner_tid;  // thread id of the previous owner of the monitor
+  volatile intptr_t  _recursions;   // recursion count, 0 for first entry
+  ObjectWaiter * volatile _EntryList; // Threads blocked on entry or reentry.
+                                      // The list is actually composed of WaitNodes,
+                                      // acting as proxies for Threads.
+ private:
+  ObjectWaiter * volatile _cxq;     // LL of recently-arrived threads blocked on entry.
+  Thread * volatile _succ;          // Heir presumptive thread - used for futile wakeup throttling
+  Thread * volatile _Responsible;
+
+  volatile int _Spinner;            // for exit->spinner handoff optimization
+  volatile int _SpinFreq;           // Spin 1-out-of-N attempts: success rate
+  volatile int _SpinClock;
+  volatile intptr_t _SpinState;     // MCS/CLH list of spinners
+  volatile int _SpinDuration;
+
+  volatile jint  _count;            // reference count to prevent reclamation/deflation
+                                    // at stop-the-world time.  See deflate_idle_monitors().
+                                    // _count is approximately |_WaitSet| + |_EntryList|
+ protected:
+  ObjectWaiter * volatile _WaitSet; // LL of threads wait()ing on the monitor
+  volatile jint  _waiters;          // number of waiting threads
+ private:
+  volatile int _WaitSetLock;        // protects Wait Queue - simple spinlock
+
+ public:
+  static void Initialize();
+  static PerfCounter * _sync_ContendedLockAttempts;
+  static PerfCounter * _sync_FutileWakeups;
+  static PerfCounter * _sync_Parks;
+  static PerfCounter * _sync_EmptyNotifications;
+  static PerfCounter * _sync_Notifications;
+  static PerfCounter * _sync_SlowEnter;
+  static PerfCounter * _sync_SlowExit;
+  static PerfCounter * _sync_SlowNotify;
+  static PerfCounter * _sync_SlowNotifyAll;
+  static PerfCounter * _sync_FailedSpins;
+  static PerfCounter * _sync_SuccessfulSpins;
+  static PerfCounter * _sync_PrivateA;
+  static PerfCounter * _sync_PrivateB;
+  static PerfCounter * _sync_MonInCirculation;
+  static PerfCounter * _sync_MonScavenged;
+  static PerfCounter * _sync_Inflations;
+  static PerfCounter * _sync_Deflations;
+  static PerfLongVariable * _sync_MonExtant;
+
+  static int Knob_Verbose;
+  static int Knob_VerifyInUse;
+  static int Knob_SpinLimit;
+
+  void* operator new (size_t size) throw() {
+    return AllocateHeap(size, mtInternal);
+  }
+  void* operator new[] (size_t size) throw() {
+    return operator new (size);
+  }
+  void operator delete(void* p) {
+    FreeHeap(p, mtInternal);
+  }
+  void operator delete[] (void *p) {
+    operator delete(p);
+  }
+
  // TODO-FIXME: the "offset" routines should return a type of off_t instead of int ...
  // ByteSize would also be an appropriate type.
  static int header_offset_in_bytes()      { return offset_of(ObjectMonitor, _header); }
@ -100,14 +228,11 @@ class ObjectMonitor {
  static int Responsible_offset_in_bytes() { return offset_of(ObjectMonitor, _Responsible); }
  static int Spinner_offset_in_bytes()     { return offset_of(ObjectMonitor, _Spinner); }

- public:
  // Eventually we'll make provisions for multiple callbacks, but
  // now one will suffice.
  static int (*SpinCallbackFunction)(intptr_t, int);
  static intptr_t SpinCallbackArgument;

-
- public:
  markOop   header() const;
  void      set_header(markOop hdr);

@ -123,39 +248,22 @@ class ObjectMonitor {
  void*     owner() const;
  void      set_owner(void* owner);

-  intptr_t  waiters() const;
+  jint      waiters() const;

-  intptr_t  count() const;
-  void      set_count(intptr_t count);
-  intptr_t  contentions() const;
+  jint      count() const;
+  void      set_count(jint count);
+  jint      contentions() const;
  intptr_t  recursions() const                                         { return _recursions; }

-  // JVM/DI GetMonitorInfo() needs this
+  // JVM/TI GetObjectMonitorUsage() needs this:
  ObjectWaiter* first_waiter()                                         { return _WaitSet; }
  ObjectWaiter* next_waiter(ObjectWaiter* o)                           { return o->_next; }
  Thread* thread_of_waiter(ObjectWaiter* o)                            { return o->_thread; }

-  // initialize the monitor, exception the semaphore, all other fields
-  // are simple integers or pointers
-  ObjectMonitor() {
-    _header       = NULL;
-    _count        = 0;
-    _waiters      = 0;
-    _recursions   = 0;
-    _object       = NULL;
-    _owner        = NULL;
-    _WaitSet      = NULL;
-    _WaitSetLock  = 0;
-    _Responsible  = NULL;
-    _succ         = NULL;
-    _cxq          = NULL;
-    FreeNext      = NULL;
-    _EntryList    = NULL;
-    _SpinFreq     = 0;
-    _SpinClock    = 0;
-    OwnerIsThread = 0;
-    _previous_owner_tid = 0;
-  }
+ protected:
+  // We don't typically expect or want the ctors or dtors to run.
+  // normal ObjectMonitors are type-stable and immortal.
+  ObjectMonitor() { ::memset((void *)this, 0, sizeof(*this)); }

  ~ObjectMonitor() {
    // TODO: Add asserts ...
@ -169,7 +277,7 @@ class ObjectMonitor {
    // _cxq == 0 _succ == NULL _owner == NULL _waiters == 0
    // _count == 0 EntryList  == NULL
    // _recursions == 0 _WaitSet == NULL
-    // TODO: assert (is_busy()|_recursions) == 0
+    assert(((is_busy()|_recursions) == 0), "freeing inuse monitor");
    _succ          = NULL;
    _EntryList     = NULL;
    _cxq           = NULL;
@ -177,7 +285,6 @@ class ObjectMonitor {
    _recursions    = 0;
    _SpinFreq      = 0;
    _SpinClock     = 0;
-    OwnerIsThread  = 0;
  }

 public:
@ -221,7 +328,6 @@ class ObjectMonitor {
  int       TrySpin_Fixed(Thread * Self);
  int       TrySpin_VaryFrequency(Thread * Self);
  int       TrySpin_VaryDuration(Thread * Self);
-  void      ctAsserts();
  void      ExitEpilog(Thread * Self, ObjectWaiter * Wakee);
  bool      ExitSuspendEquivalent(JavaThread * Self);
  void      post_monitor_wait_event(EventJavaMonitorWait * event,
@ -229,102 +335,6 @@ class ObjectMonitor {
                                    jlong timeout,
                                    bool timedout);

- private:
-  friend class ObjectSynchronizer;
-  friend class ObjectWaiter;
-  friend class VMStructs;
-
-  // WARNING: this must be the very first word of ObjectMonitor
-  // This means this class can't use any virtual member functions.
-
-  volatile markOop   _header;       // displaced object header word - mark
-  void*     volatile _object;       // backward object pointer - strong root
-
-  double SharingPad[1];             // temp to reduce false sharing
-
-  // All the following fields must be machine word aligned
-  // The VM assumes write ordering wrt these fields, which can be
-  // read from other threads.
-
- protected:                         // protected for jvmtiRawMonitor
-  void *  volatile _owner;          // pointer to owning thread OR BasicLock
-  volatile jlong _previous_owner_tid;  // thread id of the previous owner of the monitor
-  volatile intptr_t  _recursions;   // recursion count, 0 for first entry
- private:
-  int OwnerIsThread;                // _owner is (Thread *) vs SP/BasicLock
-  ObjectWaiter * volatile _cxq;     // LL of recently-arrived threads blocked on entry.
-                                    // The list is actually composed of WaitNodes, acting
-                                    // as proxies for Threads.
- protected:
-  ObjectWaiter * volatile _EntryList;  // Threads blocked on entry or reentry.
- private:
-  Thread * volatile _succ;          // Heir presumptive thread - used for futile wakeup throttling
-  Thread * volatile _Responsible;
-  int _PromptDrain;                 // rqst to drain cxq into EntryList ASAP
-
-  volatile int _Spinner;            // for exit->spinner handoff optimization
-  volatile int _SpinFreq;           // Spin 1-out-of-N attempts: success rate
-  volatile int _SpinClock;
-  volatile int _SpinDuration;
-  volatile intptr_t _SpinState;     // MCS/CLH list of spinners
-
-  // TODO-FIXME: _count, _waiters and _recursions should be of
-  // type int, or int32_t but not intptr_t.  There's no reason
-  // to use 64-bit fields for these variables on a 64-bit JVM.
-
-  volatile intptr_t  _count;        // reference count to prevent reclamation/deflation
-                                    // at stop-the-world time.  See deflate_idle_monitors().
-                                    // _count is approximately |_WaitSet| + |_EntryList|
- protected:
-  volatile intptr_t  _waiters;      // number of waiting threads
- private:
- protected:
-  ObjectWaiter * volatile _WaitSet; // LL of threads wait()ing on the monitor
- private:
-  volatile int _WaitSetLock;        // protects Wait Queue - simple spinlock
-
- public:
-  int _QMix;                        // Mixed prepend queue discipline
-  ObjectMonitor * FreeNext;         // Free list linkage
-  intptr_t StatA, StatsB;
-
- public:
-  static void Initialize();
-  static PerfCounter * _sync_ContendedLockAttempts;
-  static PerfCounter * _sync_FutileWakeups;
-  static PerfCounter * _sync_Parks;
-  static PerfCounter * _sync_EmptyNotifications;
-  static PerfCounter * _sync_Notifications;
-  static PerfCounter * _sync_SlowEnter;
-  static PerfCounter * _sync_SlowExit;
-  static PerfCounter * _sync_SlowNotify;
-  static PerfCounter * _sync_SlowNotifyAll;
-  static PerfCounter * _sync_FailedSpins;
-  static PerfCounter * _sync_SuccessfulSpins;
-  static PerfCounter * _sync_PrivateA;
-  static PerfCounter * _sync_PrivateB;
-  static PerfCounter * _sync_MonInCirculation;
-  static PerfCounter * _sync_MonScavenged;
-  static PerfCounter * _sync_Inflations;
-  static PerfCounter * _sync_Deflations;
-  static PerfLongVariable * _sync_MonExtant;
-
- public:
-  static int Knob_Verbose;
-  static int Knob_VerifyInUse;
-  static int Knob_SpinLimit;
-  void* operator new (size_t size) throw() {
-    return AllocateHeap(size, mtInternal);
-  }
-  void* operator new[] (size_t size) throw() {
-    return operator new (size);
-  }
-  void operator delete(void* p) {
-    FreeHeap(p, mtInternal);
-  }
-  void operator delete[] (void *p) {
-    operator delete(p);
-  }
 };

 #undef TEVENT
--- a/hotspot/src/share/vm/runtime/objectMonitor.inline.hpp
+++ b/hotspot/src/share/vm/runtime/objectMonitor.inline.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -40,15 +40,11 @@ inline void ObjectMonitor::set_header(markOop hdr) {
  _header = hdr;
 }

-inline intptr_t ObjectMonitor::count() const {
+inline jint ObjectMonitor::count() const {
  return _count;
 }

-inline void ObjectMonitor::set_count(intptr_t count) {
-  _count= count;
-}
-
-inline intptr_t ObjectMonitor::waiters() const {
+inline jint ObjectMonitor::waiters() const {
  return _waiters;
 }

@ -61,7 +57,7 @@ inline void ObjectMonitor::clear() {
  assert(_count == 0, "Fatal logic error in ObjectMonitor count!");
  assert(_waiters == 0, "Fatal logic error in ObjectMonitor waiters!");
  assert(_recursions == 0, "Fatal logic error in ObjectMonitor recursions!");
-  assert(_object, "Fatal logic error in ObjectMonitor object!");
+  assert(_object != NULL, "Fatal logic error in ObjectMonitor object!");
  assert(_owner == 0, "Fatal logic error in ObjectMonitor owner!");

  _header = NULL;
@ -85,7 +81,6 @@ inline bool ObjectMonitor::check(TRAPS) {
  if (THREAD != _owner) {
    if (THREAD->is_lock_owned((address) _owner)) {
      _owner = THREAD;  // regain ownership of inflated monitor
-      OwnerIsThread = 1 ;
      assert (_recursions == 0, "invariant") ;
    } else {
      check_slow(THREAD);
@ -97,7 +92,7 @@ inline bool ObjectMonitor::check(TRAPS) {


 // return number of threads contending for this monitor
-inline intptr_t ObjectMonitor::contentions() const {
+inline jint ObjectMonitor::contentions() const {
  return _count;
 }

--- a/hotspot/src/share/vm/runtime/synchronizer.cpp
+++ b/hotspot/src/share/vm/runtime/synchronizer.cpp
@ -24,6 +24,7 @@

 #include "precompiled.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "memory/padded.hpp"
 #include "memory/resourceArea.hpp"
 #include "oops/markOop.hpp"
 #include "oops/oop.inline.hpp"
@ -110,6 +111,8 @@ int dtrace_waited_probe(ObjectMonitor* monitor, Handle obj, Thread* thr) {
 #define NINFLATIONLOCKS 256
 static volatile intptr_t InflationLocks[NINFLATIONLOCKS];

+// gBlockList is really PaddedEnd<ObjectMonitor> *, but we don't
+// want to expose the PaddedEnd template more than necessary.
 ObjectMonitor * ObjectSynchronizer::gBlockList = NULL;
 ObjectMonitor * volatile ObjectSynchronizer::gFreeList  = NULL;
 ObjectMonitor * volatile ObjectSynchronizer::gOmInUseList  = NULL;
@ -410,16 +413,15 @@ void ObjectSynchronizer::notifyall(Handle obj, TRAPS) {
 // performed by the CPU(s) or platform.

 struct SharedGlobals {
+  char         _pad_prefix[DEFAULT_CACHE_LINE_SIZE];
  // These are highly shared mostly-read variables.
-  // To avoid false-sharing they need to be the sole occupants of a $ line.
-  double padPrefix[8];
+  // To avoid false-sharing they need to be the sole occupants of a cache line.
  volatile int stwRandom;
  volatile int stwCycle;
-
-  // Hot RW variables -- Sequester to avoid false-sharing
-  double padSuffix[16];
+  DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
+  // Hot RW variable -- Sequester to avoid false-sharing
  volatile int hcSequence;
-  double padFinal[8];
+  DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int));
 };

 static SharedGlobals GVars;
@ -780,18 +782,18 @@ JavaThread* ObjectSynchronizer::get_lock_owner(Handle h_obj, bool doLock) {
 // Visitors ...

 void ObjectSynchronizer::monitors_iterate(MonitorClosure* closure) {
-  ObjectMonitor* block = gBlockList;
+  PaddedEnd<ObjectMonitor> * block = (PaddedEnd<ObjectMonitor> *)gBlockList;
  ObjectMonitor* mid;
  while (block) {
    assert(block->object() == CHAINMARKER, "must be a block header");
    for (int i = _BLOCKSIZE - 1; i > 0; i--) {
-      mid = block + i;
+      mid = (ObjectMonitor *)(block + i);
      oop object = (oop) mid->object();
      if (object != NULL) {
        closure->do_monitor(mid);
      }
    }
-    block = (ObjectMonitor*) block->FreeNext;
+    block = (PaddedEnd<ObjectMonitor> *) block->FreeNext;
  }
 }

@ -806,10 +808,12 @@ static inline ObjectMonitor* next(ObjectMonitor* block) {

 void ObjectSynchronizer::oops_do(OopClosure* f) {
  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
-  for (ObjectMonitor* block = gBlockList; block != NULL; block = next(block)) {
+  for (PaddedEnd<ObjectMonitor> * block =
+       (PaddedEnd<ObjectMonitor> *)gBlockList; block != NULL;
+       block = (PaddedEnd<ObjectMonitor> *)next(block)) {
    assert(block->object() == CHAINMARKER, "must be a block header");
    for (int i = 1; i < _BLOCKSIZE; i++) {
-      ObjectMonitor* mid = &block[i];
+      ObjectMonitor* mid = (ObjectMonitor *)&block[i];
      if (mid->object() != NULL) {
        f->do_oop((oop*)mid->object_addr());
      }
@ -966,16 +970,29 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::omAlloc(Thread * Self) {
    // 3: allocate a block of new ObjectMonitors
    // Both the local and global free lists are empty -- resort to malloc().
    // In the current implementation objectMonitors are TSM - immortal.
+    // Ideally, we'd write "new ObjectMonitor[_BLOCKSIZE], but we want
+    // each ObjectMonitor to start at the beginning of a cache line,
+    // so we use align_size_up().
+    // A better solution would be to use C++ placement-new.
+    // BEWARE: As it stands currently, we don't run the ctors!
    assert(_BLOCKSIZE > 1, "invariant");
-    ObjectMonitor * temp = new ObjectMonitor[_BLOCKSIZE];
+    size_t neededsize = sizeof(PaddedEnd<ObjectMonitor>) * _BLOCKSIZE;
+    PaddedEnd<ObjectMonitor> * temp;
+    size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1);
+    void* real_malloc_addr = (void *)NEW_C_HEAP_ARRAY(char, aligned_size,
+                                                      mtInternal);
+    temp = (PaddedEnd<ObjectMonitor> *)
+             align_size_up((intptr_t)real_malloc_addr,
+                           DEFAULT_CACHE_LINE_SIZE);

    // NOTE: (almost) no way to recover if allocation failed.
    // We might be able to induce a STW safepoint and scavenge enough
    // objectMonitors to permit progress.
    if (temp == NULL) {
-      vm_exit_out_of_memory(sizeof (ObjectMonitor[_BLOCKSIZE]), OOM_MALLOC_ERROR,
+      vm_exit_out_of_memory(neededsize, OOM_MALLOC_ERROR,
                            "Allocate ObjectMonitors");
    }
+    (void)memset((void *) temp, 0, neededsize);

    // Format the block.
    // initialize the linked list, each monitor points to its next
@ -986,7 +1003,7 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::omAlloc(Thread * Self) {
    // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; }

    for (int i = 1; i < _BLOCKSIZE; i++) {
-      temp[i].FreeNext = &temp[i+1];
+      temp[i].FreeNext = (ObjectMonitor *)&temp[i+1];
    }

    // terminate the last monitor as the end of list
@ -1141,10 +1158,6 @@ ObjectMonitor* ObjectSynchronizer::inflate_helper(oop obj) {
 }


-// Note that we could encounter some performance loss through false-sharing as
-// multiple locks occupy the same $ line.  Padding might be appropriate.
-
-
 ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
                                                     oop object) {
  // Inflate mutates the heap ...
@ -1210,7 +1223,6 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
      // in which INFLATING appears in the mark.
      m->Recycle();
      m->_Responsible  = NULL;
-      m->OwnerIsThread = 0;
      m->_recursions   = 0;
      m->_SpinDuration = ObjectMonitor::Knob_SpinLimit;   // Consider: maintain by type/class

@ -1257,8 +1269,8 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
      m->set_header(dmw);

      // Optimization: if the mark->locker stack address is associated
-      // with this thread we could simply set m->_owner = Self and
-      // m->OwnerIsThread = 1. Note that a thread can inflate an object
+      // with this thread we could simply set m->_owner = Self.
+      // Note that a thread can inflate an object
      // that it has stack-locked -- as might happen in wait() -- directly
      // with CAS.  That is, we can avoid the xchg-NULL .... ST idiom.
      m->set_owner(mark->locker());
@ -1302,7 +1314,6 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
    m->set_header(mark);
    m->set_owner(NULL);
    m->set_object(object);
-    m->OwnerIsThread = 1;
    m->_recursions   = 0;
    m->_Responsible  = NULL;
    m->_SpinDuration = ObjectMonitor::Knob_SpinLimit;       // consider: keep metastats by type/class
@ -1310,7 +1321,6 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
    if (Atomic::cmpxchg_ptr (markOopDesc::encode(m), object->mark_addr(), mark) != mark) {
      m->set_object(NULL);
      m->set_owner(NULL);
-      m->OwnerIsThread = 0;
      m->Recycle();
      omRelease(Self, m, true);
      m = NULL;
@ -1336,9 +1346,6 @@ ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
  }
 }

-// Note that we could encounter some performance loss through false-sharing as
-// multiple locks occupy the same $ line.  Padding might be appropriate.
-

 // Deflate_idle_monitors() is called at all safepoints, immediately
 // after all mutators are stopped, but before any objects have moved.
@ -1491,12 +1498,14 @@ void ObjectSynchronizer::deflate_idle_monitors() {
      nInuse += gOmInUseCount;
    }

-  } else for (ObjectMonitor* block = gBlockList; block != NULL; block = next(block)) {
+  } else for (PaddedEnd<ObjectMonitor> * block =
+              (PaddedEnd<ObjectMonitor> *)gBlockList; block != NULL;
+              block = (PaddedEnd<ObjectMonitor> *)next(block)) {
    // Iterate over all extant monitors - Scavenge all idle monitors.
    assert(block->object() == CHAINMARKER, "must be a block header");
    nInCirculation += _BLOCKSIZE;
    for (int i = 1; i < _BLOCKSIZE; i++) {
-      ObjectMonitor* mid = &block[i];
+      ObjectMonitor* mid = (ObjectMonitor*)&block[i];
      oop obj = (oop) mid->object();

      if (obj == NULL) {
@ -1648,18 +1657,18 @@ void ObjectSynchronizer::sanity_checks(const bool verbose,

 // Verify all monitors in the monitor cache, the verification is weak.
 void ObjectSynchronizer::verify() {
-  ObjectMonitor* block = gBlockList;
+  PaddedEnd<ObjectMonitor> * block = (PaddedEnd<ObjectMonitor> *)gBlockList;
  ObjectMonitor* mid;
  while (block) {
    assert(block->object() == CHAINMARKER, "must be a block header");
    for (int i = 1; i < _BLOCKSIZE; i++) {
-      mid = block + i;
+      mid = (ObjectMonitor *)(block + i);
      oop object = (oop) mid->object();
      if (object != NULL) {
        mid->verify();
      }
    }
-    block = (ObjectMonitor*) block->FreeNext;
+    block = (PaddedEnd<ObjectMonitor> *) block->FreeNext;
  }
 }

@ -1668,18 +1677,19 @@ void ObjectSynchronizer::verify() {
 // the list of extant blocks without taking a lock.

 int ObjectSynchronizer::verify_objmon_isinpool(ObjectMonitor *monitor) {
-  ObjectMonitor* block = gBlockList;
+  PaddedEnd<ObjectMonitor> * block = (PaddedEnd<ObjectMonitor> *)gBlockList;

  while (block) {
    assert(block->object() == CHAINMARKER, "must be a block header");
-    if (monitor > &block[0] && monitor < &block[_BLOCKSIZE]) {
+    if (monitor > (ObjectMonitor *)&block[0] &&
+        monitor < (ObjectMonitor *)&block[_BLOCKSIZE]) {
      address mon = (address) monitor;
      address blk = (address) block;
      size_t diff = mon - blk;
-      assert((diff % sizeof(ObjectMonitor)) == 0, "check");
+      assert((diff % sizeof(PaddedEnd<ObjectMonitor>)) == 0, "check");
      return 1;
    }
-    block = (ObjectMonitor*) block->FreeNext;
+    block = (PaddedEnd<ObjectMonitor> *) block->FreeNext;
  }
  return 0;
 }
--- a/hotspot/src/share/vm/runtime/synchronizer.hpp
+++ b/hotspot/src/share/vm/runtime/synchronizer.hpp
@ -134,6 +134,8 @@ class ObjectSynchronizer : AllStatic {

 private:
  enum { _BLOCKSIZE = 128 };
+  // gBlockList is really PaddedEnd<ObjectMonitor> *, but we don't
+  // want to expose the PaddedEnd template more than necessary.
  static ObjectMonitor* gBlockList;
  static ObjectMonitor * volatile gFreeList;
  // global monitor in use list, for moribund threads,
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp
@ -1070,8 +1070,8 @@ typedef TwoOopHashtable<Symbol*, mtClass>     SymbolTwoOopHashtable;
  volatile_nonstatic_field(ObjectMonitor,      _header,                                       markOop)                               \
  unchecked_nonstatic_field(ObjectMonitor,     _object,                                       sizeof(void *)) /* NOTE: no type */    \
  unchecked_nonstatic_field(ObjectMonitor,     _owner,                                        sizeof(void *)) /* NOTE: no type */    \
-  volatile_nonstatic_field(ObjectMonitor,      _count,                                        intptr_t)                              \
-  volatile_nonstatic_field(ObjectMonitor,      _waiters,                                      intptr_t)                              \
+  volatile_nonstatic_field(ObjectMonitor,      _count,                                        jint)                                  \
+  volatile_nonstatic_field(ObjectMonitor,      _waiters,                                      jint)                                  \
  volatile_nonstatic_field(ObjectMonitor,      _recursions,                                   intptr_t)                              \
  nonstatic_field(ObjectMonitor,               FreeNext,                                      ObjectMonitor*)                        \
  volatile_nonstatic_field(BasicLock,          _displaced_header,                             markOop)                               \
@ -2507,6 +2507,12 @@ typedef TwoOopHashtable<Symbol*, mtClass>     SymbolTwoOopHashtable;
  declare_constant(Deoptimization::Action_make_not_compilable)            \
  declare_constant(Deoptimization::Action_LIMIT)                          \
                                                                          \
+  /***************************************************/                   \
+  /* DEFAULT_CACHE_LINE_SIZE (globalDefinitions.hpp) */                   \
+  /***************************************************/                   \
+                                                                          \
+  declare_constant(DEFAULT_CACHE_LINE_SIZE)                               \
+                                                                          \
  /*********************/                                                 \
  /* Matcher (C2 only) */                                                 \
  /*********************/                                                 \
--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp
+++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp
@ -540,7 +540,9 @@ inline address clamp_address_in_page(address addr, address page_address, intptr_


 // The expected size in bytes of a cache line, used to pad data structures.
-#define DEFAULT_CACHE_LINE_SIZE 64
+#ifndef DEFAULT_CACHE_LINE_SIZE
+  #define DEFAULT_CACHE_LINE_SIZE 64
+#endif


 //----------------------------------------------------------------------------------------------------