8253183: Fragile memory barrier selection for some weak memory model platforms

Reviewed-by: dholmes, eosterlund, dcubed
2020-09-30 09:03:06 +00:00 · 2020-09-30 09:03:06 +00:00 · dc3a0f5f88
commit dc3a0f5f88
parent 8331e63fe4
4 changed files with 23 additions and 21 deletions
--- a/src/hotspot/share/gc/shared/taskqueue.inline.hpp
+++ b/src/hotspot/share/gc/shared/taskqueue.inline.hpp
@ -205,7 +205,7 @@ bool OverflowTaskQueue<E, F, N>::pop_overflow(E& t)
 template<class E, MEMFLAGS F, unsigned int N>
 bool GenericTaskQueue<E, F, N>::pop_global(E& t) {
  Age oldAge = age_relaxed();
-#ifndef CPU_MULTI_COPY_ATOMIC
+
  // Architectures with non-multi-copy-atomic memory model require a
  // full fence here to guarantee that bottom is not older than age,
  // which is crucial for the correctness of the algorithm.
@ -219,12 +219,8 @@ bool GenericTaskQueue<E, F, N>::pop_global(E& t) {
  // The requirement is that Thread3 must never read an older bottom
  // value than Thread2 after Thread3 has seen the age value from
  // Thread2.
-  OrderAccess::fence();
-#else
-  // Everyone else can make do with a LoadLoad barrier to keep reads
-  // from age and bottom in order.
-  OrderAccess::loadload();
-#endif
+  OrderAccess::loadload_for_IRIW();
+
  uint localBot = bottom_acquire();
  uint n_elems = clean_size(localBot, oldAge.top());
  if (n_elems == 0) {
--- a/src/hotspot/share/runtime/objectMonitor.cpp
+++ b/src/hotspot/share/runtime/objectMonitor.cpp
@ -489,13 +489,10 @@ void ObjectMonitor::install_displaced_markword_in_object(const oop obj) {

  // Separate loads in is_being_async_deflated(), which is almost always
  // called before this function, from the load of dmw/header below.
-  if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-    // A non-multiple copy atomic (nMCA) machine needs a bigger
-    // hammer to separate the loads before and the load below.
-    OrderAccess::fence();
-  } else {
-    OrderAccess::loadload();
-  }
+
+  // _contentions and dmw/header may get written by different threads.
+  // Make sure to observe them in the same order when having several observers.
+  OrderAccess::loadload_for_IRIW();

  const oop l_object = object_peek();
  if (l_object == NULL) {
--- a/src/hotspot/share/runtime/orderAccess.hpp
+++ b/src/hotspot/share/runtime/orderAccess.hpp
@ -243,6 +243,17 @@ class OrderAccess : public AllStatic {
  static void     fence();

  static void     cross_modify_fence();
+
+  // Processors which are not multi-copy-atomic require a full fence
+  // to enforce a globally consistent order of Independent Reads of
+  // Independent Writes. Please use only for such patterns!
+  static void     loadload_for_IRIW() {
+#ifndef CPU_MULTI_COPY_ATOMIC
+    fence();
+#else
+    loadload();
+#endif
+  }
 private:
  // This is a helper that invokes the StubRoutines::fence_entry()
  // routine if it exists, It should only be used by platforms that
--- a/src/hotspot/share/runtime/synchronizer.cpp
+++ b/src/hotspot/share/runtime/synchronizer.cpp
@ -1105,13 +1105,11 @@ intptr_t ObjectSynchronizer::FastHashCode(Thread* self, oop obj) {

        // Separate load of dmw/header above from the loads in
        // is_being_async_deflated().
-        if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-          // A non-multiple copy atomic (nMCA) machine needs a bigger
-          // hammer to separate the load above and the loads below.
-          OrderAccess::fence();
-        } else {
-          OrderAccess::loadload();
-        }
+
+        // dmw/header and _contentions may get written by different threads.
+        // Make sure to observe them in the same order when having several observers.
+        OrderAccess::loadload_for_IRIW();
+
        if (monitor->is_being_async_deflated()) {
          // But we can't safely use the hash if we detect that async
          // deflation has occurred. So we attempt to restore the