diff --git a/src/hotspot/share/code/dependencyContext.hpp b/src/hotspot/share/code/dependencyContext.hpp
index 76fa92dd832..771957e32c2 100644
--- a/src/hotspot/share/code/dependencyContext.hpp
+++ b/src/hotspot/share/code/dependencyContext.hpp
@@ -107,7 +107,7 @@ class DependencyContext : public StackObj {
       _safepoint_counter(SafepointSynchronize::safepoint_counter()) {}
   ~DependencyContext() {
-    assert(_safepoint_counter == SafepointSynchronize::safepoint_counter(), "safepoint happened");
+    assert(SafepointSynchronize::is_same_safepoint(_safepoint_counter), "must be the same safepoint");
   DependencyContext(nmethodBucket* volatile* bucket_addr, volatile uint64_t* last_cleanup_addr)
diff --git a/src/hotspot/share/jfr/recorder/repository/jfrEmergencyDump.cpp b/src/hotspot/share/jfr/recorder/repository/jfrEmergencyDump.cpp
index 2b0948a5a23..147437e25e7 100644
--- a/src/hotspot/share/jfr/recorder/repository/jfrEmergencyDump.cpp
+++ b/src/hotspot/share/jfr/recorder/repository/jfrEmergencyDump.cpp
@@ -82,10 +82,6 @@ static void prepare_for_emergency_dump(Thread* thread) {
-  if (Safepoint_lock->owned_by_self()) {
-    Safepoint_lock->unlock();
-  }
   if (VMOperationQueue_lock->owned_by_self()) {
diff --git a/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTraceRepository.cpp b/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTraceRepository.cpp
index 7600e68e604..e9770a2775d 100644
--- a/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTraceRepository.cpp
+++ b/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTraceRepository.cpp
@@ -1,5 +1,5 @@
- * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
  * This code is free software; you can redistribute it and/or modify it
@@ -383,8 +383,7 @@ void JfrStackTrace::resolve_linenos() {
 bool JfrStackTrace::record_safe(JavaThread* thread, int skip, bool leakp /* false */) {
-  assert(SafepointSynchronize::safepoint_safe(thread, thread->thread_state())
-         || thread == Thread::current(), "Thread stack needs to be walkable");
+  assert(thread == Thread::current(), "Thread stack needs to be walkable");
   vframeStream vfs(thread);
   u4 count = 0;
   _reached_root = true;
diff --git a/src/hotspot/share/runtime/handshake.cpp b/src/hotspot/share/runtime/handshake.cpp
index d22f169a81a..0d925c8c8b5 100644
--- a/src/hotspot/share/runtime/handshake.cpp
+++ b/src/hotspot/share/runtime/handshake.cpp
@@ -1,5 +1,5 @@
- * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
  * This code is free software; you can redistribute it and/or modify it
@@ -303,13 +303,9 @@ void HandshakeState::process_self_inner(JavaThread* thread) {
 bool HandshakeState::vmthread_can_process_handshake(JavaThread* target) {
-  // SafepointSynchronize::safepoint_safe() does not consider an externally
-  // suspended thread to be safe. However, this function must be called with
-  // the Threads_lock held so an externally suspended thread cannot be
-  // resumed thus it is safe.
-  assert(Threads_lock->owned_by_self(), "Not holding Threads_lock.");
-  return SafepointSynchronize::safepoint_safe(target, target->thread_state()) ||
-         target->is_ext_suspended() || target->is_terminated();
+  // handshake_safe may only be called with polls armed.
+  // VM thread controls this by first claiming the handshake via claim_handshake_for_vmthread.
+  return SafepointSynchronize::handshake_safe(target);
 static bool possibly_vmthread_can_process_handshake(JavaThread* target) {
diff --git a/src/hotspot/share/runtime/interfaceSupport.inline.hpp b/src/hotspot/share/runtime/interfaceSupport.inline.hpp
index ebf0998d595..a0df8244f18 100644
--- a/src/hotspot/share/runtime/interfaceSupport.inline.hpp
+++ b/src/hotspot/share/runtime/interfaceSupport.inline.hpp
@@ -314,10 +314,10 @@ class ThreadBlockInVMWithDeadlockCheck : public ThreadStateTransition {
     // Once we are blocked vm expects stack to be walkable
-    thread->set_thread_state((JavaThreadState)(_thread_in_vm + 1));
-    InterfaceSupport::serialize_thread_state_with_handler(thread);
-    SafepointMechanism::callback_if_safepoint(thread);
+    // All unsafe states are treated the same by the VMThread
+    // so we can skip the _thread_in_vm_trans state here. Since
+    // we don't read poll, it's enough to order the stores.
+    OrderAccess::storestore();
@@ -325,23 +325,13 @@ class ThreadBlockInVMWithDeadlockCheck : public ThreadStateTransition {
   ~ThreadBlockInVMWithDeadlockCheck() {
     // Change to transition state
-    _thread->set_thread_state((JavaThreadState)(_thread_blocked + 1));
+    _thread->set_thread_state((JavaThreadState)(_thread_blocked_trans));
     if (SafepointMechanism::should_block(_thread)) {
-      SafepointMechanism::callback_if_safepoint(_thread);
-      // The VMThread might have read that we were in a _thread_blocked state
-      // and proceeded to process a handshake for us. If that's the case then
-      // we need to block.
-      // By doing this we are also making the current thread process its own
-      // handshake if there is one pending and the VMThread didn't try to process
-      // it yet. This is more of a side-effect and not really necessary; the
-      // handshake could be processed later on.
-      if (_thread->has_handshake()) {
-        _thread->handshake_process_by_self();
-      }
+      SafepointMechanism::block_if_requested(_thread);
diff --git a/src/hotspot/share/runtime/mutex.cpp b/src/hotspot/share/runtime/mutex.cpp
index 447288bf012..f910e65a6f7 100644
--- a/src/hotspot/share/runtime/mutex.cpp
+++ b/src/hotspot/share/runtime/mutex.cpp
@@ -401,15 +401,10 @@ void Monitor::set_owner_implementation(Thread *new_owner) {
     // of m2 be less than the rank of m1.
     // The rank Mutex::native  is an exception in that it is not subject
     // to the verification rules.
-    // Here are some further notes relating to mutex acquisition anomalies:
-    // . it is also ok to acquire Safepoint_lock at the very end while we
-    //   already hold Terminator_lock - may happen because of periodic safepoints
     if (this->rank() != Mutex::native &&
         this->rank() != Mutex::suspend_resume &&
         locks != NULL && locks->rank() <= this->rank() &&
-        !SafepointSynchronize::is_at_safepoint() &&
-        !(this == Safepoint_lock && contains(locks, Terminator_lock) &&
-        SafepointSynchronize::is_synchronizing())) {
+        !SafepointSynchronize::is_at_safepoint()) {
       fatal("acquiring lock %s/%d out of order with lock %s/%d -- "
             "possible deadlock", this->name(), this->rank(),
diff --git a/src/hotspot/share/runtime/mutex.hpp b/src/hotspot/share/runtime/mutex.hpp
index 17f9e01879d..932249ec538 100644
--- a/src/hotspot/share/runtime/mutex.hpp
+++ b/src/hotspot/share/runtime/mutex.hpp
@@ -56,10 +56,7 @@ class Monitor : public CHeapObj<mtInternal> {
   // (except for "event" and "access") for the deadlock detection to work correctly.
   // The rank native is only for use in Mutex's created by JVM_RawMonitorCreate,
   // which being external to the VM are not subject to deadlock detection.
-  // The rank safepoint is used only for synchronization in reaching a
-  // safepoint and leaving a safepoint.  It is only used for the Safepoint_lock
-  // currently.  While at a safepoint no mutexes of rank safepoint are held
-  // by any thread.
+  // While at a safepoint no mutexes of rank safepoint are held by any thread.
   // The rank named "leaf" is probably historical (and should
   // be changed) -- mutexes of this rank aren't really leaf mutexes
   // at all.
diff --git a/src/hotspot/share/runtime/mutexLocker.cpp b/src/hotspot/share/runtime/mutexLocker.cpp
index b02c331a7b8..f2a4174c71c 100644
--- a/src/hotspot/share/runtime/mutexLocker.cpp
+++ b/src/hotspot/share/runtime/mutexLocker.cpp
@@ -72,7 +72,6 @@ Mutex*   TouchedMethodLog_lock        = NULL;
 Mutex*   RetData_lock                 = NULL;
 Monitor* VMOperationQueue_lock        = NULL;
 Monitor* VMOperationRequest_lock      = NULL;
-Monitor* Safepoint_lock               = NULL;
 Monitor* SerializePage_lock           = NULL;
 Monitor* Threads_lock                 = NULL;
 Mutex*   NonJavaThreadsList_lock      = NULL;
@@ -275,8 +274,6 @@ void mutex_init() {
   // CMS_bitMap_lock                          leaf 1
   // CMS_freeList_lock                        leaf 2
-  def(Safepoint_lock               , PaddedMonitor, safepoint,   true,  Monitor::_safepoint_check_sometimes);  // locks SnippetCache_lock/Threads_lock
   def(Threads_lock                 , PaddedMonitor, barrier,     true,  Monitor::_safepoint_check_sometimes);
   def(NonJavaThreadsList_lock      , PaddedMutex,   leaf,        true,  Monitor::_safepoint_check_never);
diff --git a/src/hotspot/share/runtime/mutexLocker.hpp b/src/hotspot/share/runtime/mutexLocker.hpp
index 21eaa1c7823..4809f2cb1fc 100644
--- a/src/hotspot/share/runtime/mutexLocker.hpp
+++ b/src/hotspot/share/runtime/mutexLocker.hpp
@@ -68,7 +68,6 @@ extern Mutex*   DerivedPointerTableGC_lock;      // a lock to protect the derive
 extern Monitor* CGCPhaseManager_lock;            // a lock to protect a concurrent GC's phase management
 extern Monitor* VMOperationQueue_lock;           // a lock on queue of vm_operations waiting to execute
 extern Monitor* VMOperationRequest_lock;         // a lock on Threads waiting for a vm_operation to terminate
-extern Monitor* Safepoint_lock;                  // a lock used by the safepoint abstraction
 extern Monitor* Threads_lock;                    // a lock on the Threads table of active Java threads
                                                  // (also used by Safepoints too to block threads creation/destruction)
 extern Mutex*   NonJavaThreadsList_lock;         // a lock on the NonJavaThreads list
diff --git a/src/hotspot/share/runtime/safepoint.cpp b/src/hotspot/share/runtime/safepoint.cpp
index 4ea31dee8af..de5464a24e3 100644
--- a/src/hotspot/share/runtime/safepoint.cpp
+++ b/src/hotspot/share/runtime/safepoint.cpp
@@ -70,70 +70,63 @@
 #include "c1/c1_globals.hpp"
-template <typename E>
-static void set_current_safepoint_id(E* event, int adjustment = 0) {
-  assert(event != NULL, "invariant");
-  event->set_safepointId(SafepointSynchronize::safepoint_counter() + adjustment);
-static void post_safepoint_begin_event(EventSafepointBegin* event,
+static void post_safepoint_begin_event(EventSafepointBegin& event,
+                                       uint64_t safepoint_id,
                                        int thread_count,
                                        int critical_thread_count) {
-  assert(event != NULL, "invariant");
-  assert(event->should_commit(), "invariant");
-  set_current_safepoint_id(event);
-  event->set_totalThreadCount(thread_count);
-  event->set_jniCriticalThreadCount(critical_thread_count);
-  event->commit();
+  if (event.should_commit()) {
+    event.set_safepointId(safepoint_id);
+    event.set_totalThreadCount(thread_count);
+    event.set_jniCriticalThreadCount(critical_thread_count);
+    event.commit();
+  }
-static void post_safepoint_cleanup_event(EventSafepointCleanup* event) {
-  assert(event != NULL, "invariant");
-  assert(event->should_commit(), "invariant");
-  set_current_safepoint_id(event);
-  event->commit();
+static void post_safepoint_cleanup_event(EventSafepointCleanup& event, uint64_t safepoint_id) {
+  if (event.should_commit()) {
+    event.set_safepointId(safepoint_id);
+    event.commit();
+  }
-static void post_safepoint_synchronize_event(EventSafepointStateSynchronization* event,
+static void post_safepoint_synchronize_event(EventSafepointStateSynchronization& event,
+                                             uint64_t safepoint_id,
                                              int initial_number_of_threads,
                                              int threads_waiting_to_block,
-                                             unsigned int iterations) {
-  assert(event != NULL, "invariant");
-  if (event->should_commit()) {
-    // Group this event together with the ones committed after the counter is increased
-    set_current_safepoint_id(event, 1);
-    event->set_initialThreadCount(initial_number_of_threads);
-    event->set_runningThreadCount(threads_waiting_to_block);
-    event->set_iterations(iterations);
-    event->commit();
+                                             uint64_t iterations) {
+  if (event.should_commit()) {
+    event.set_safepointId(safepoint_id);
+    event.set_initialThreadCount(initial_number_of_threads);
+    event.set_runningThreadCount(threads_waiting_to_block);
+    event.set_iterations(iterations);
+    event.commit();
-static void post_safepoint_wait_blocked_event(EventSafepointWaitBlocked* event,
+static void post_safepoint_wait_blocked_event(EventSafepointWaitBlocked& event,
+                                              uint64_t safepoint_id,
                                               int initial_threads_waiting_to_block) {
-  assert(event != NULL, "invariant");
-  assert(event->should_commit(), "invariant");
-  set_current_safepoint_id(event);
-  event->set_runningThreadCount(initial_threads_waiting_to_block);
-  event->commit();
-static void post_safepoint_cleanup_task_event(EventSafepointCleanupTask* event,
-                                              const char* name) {
-  assert(event != NULL, "invariant");
-  if (event->should_commit()) {
-    set_current_safepoint_id(event);
-    event->set_name(name);
-    event->commit();
+  if (event.should_commit()) {
+    event.set_safepointId(safepoint_id);
+    event.set_runningThreadCount(initial_threads_waiting_to_block);
+    event.commit();
-static void post_safepoint_end_event(EventSafepointEnd* event) {
-  assert(event != NULL, "invariant");
-  if (event->should_commit()) {
-    // Group this event together with the ones committed before the counter increased
-    set_current_safepoint_id(event, -1);
-    event->commit();
+static void post_safepoint_cleanup_task_event(EventSafepointCleanupTask& event,
+                                              uint64_t safepoint_id,
+                                              const char* name) {
+  if (event.should_commit()) {
+    event.set_safepointId(safepoint_id);
+    event.set_name(name);
+    event.commit();
+  }
+static void post_safepoint_end_event(EventSafepointEnd& event, uint64_t safepoint_id) {
+  if (event.should_commit()) {
+    event.set_safepointId(safepoint_id);
+    event.commit();
@@ -141,64 +134,170 @@ static void post_safepoint_end_event(EventSafepointEnd* event) {
 // Implementation of Safepoint begin/end
 SafepointSynchronize::SynchronizeState volatile SafepointSynchronize::_state = SafepointSynchronize::_not_synchronized;
-volatile int  SafepointSynchronize::_waiting_to_block = 0;
+int SafepointSynchronize::_waiting_to_block = 0;
 volatile uint64_t SafepointSynchronize::_safepoint_counter = 0;
+const uint64_t SafepointSynchronize::InactiveSafepointCounter = 0;
 int SafepointSynchronize::_current_jni_active_count = 0;
-long  SafepointSynchronize::_end_of_last_safepoint = 0;
-int SafepointSynchronize::_defer_thr_suspend_loop_count = 4000;
-static const int safepoint_spin_before_yield = 2000;
-static volatile int PageArmed = 0 ;        // safepoint polling page is RO|RW vs PROT_NONE
-static volatile int TryingToBlock = 0 ;    // proximate value -- for advisory use only
+long SafepointSynchronize::_end_of_last_safepoint = 0;
+WaitBarrier* SafepointSynchronize::_wait_barrier;
+// We need a place to save the desc since it is released before we need it.
+static char stopped_description[64] = "";
+static bool _vm_is_waiting = false;
+static volatile bool PageArmed = false;        // safepoint polling page is RO|RW vs PROT_NONE
 static bool timeout_error_printed = false;
-// Statistic related statics
+// Statistic related
 julong SafepointSynchronize::_coalesced_vmop_count = 0;
 static jlong _safepoint_begin_time = 0;
 static float _ts_of_current_safepoint = 0.0f;
 static volatile int _nof_threads_hit_polling_page = 0;
-// Roll all threads forward to a safepoint and suspend them all
-void SafepointSynchronize::begin() {
-  EventSafepointBegin begin_event;
-  Thread* myThread = Thread::current();
-  assert(myThread->is_VM_thread(), "Only VM thread may execute a safepoint");
+void SafepointSynchronize::init(Thread* vmthread) {
+  // WaitBarrier should never be destroyed since we will have
+  // threads waiting on it while exiting.
+  _wait_barrier = new WaitBarrier(vmthread);
+void SafepointSynchronize::increment_jni_active_count() {
+  assert(Thread::current()->is_VM_thread(), "Only VM thread may increment");
+  ++_current_jni_active_count;
+void SafepointSynchronize::decrement_waiting_to_block() {
+  assert(_waiting_to_block > 0, "sanity check");
+  assert(Thread::current()->is_VM_thread(), "Only VM thread may decrement");
+  --_waiting_to_block;
+static bool thread_not_running(ThreadSafepointState *cur_state) {
+  if (!cur_state->is_running()) {
+    return true;
+  }
+  cur_state->examine_state_of_thread(SafepointSynchronize::safepoint_counter());
+  if (!cur_state->is_running()) {
+    return true;
+  }
+  LogTarget(Trace, safepoint) lt;
+  if (lt.is_enabled()) {
+    ResourceMark rm;
+    LogStream ls(lt);
+    cur_state->print_on(&ls);
+  }
+  return false;
+#ifdef ASSERT
+static void assert_list_is_valid(const ThreadSafepointState* tss_head, int still_running) {
+  int a = 0;
+  const ThreadSafepointState *tmp_tss = tss_head;
+  while (tmp_tss != NULL) {
+    ++a;
+    assert(tmp_tss->is_running(), "Illegal initial state");
+    tmp_tss = tmp_tss->get_next();
+  }
+  assert(a == still_running, "Must be the same");
+#endif // ASSERT
+static void back_off(int iteration) {
+  // iteration will be 1 the first time we enter this spin back-off.
+  // naked_short_nanosleep takes tenths of micros which means that
+  // number of nanoseconds is irrelevant if it's below that. We do
+  // 20 1 ns sleeps with a total cost of ~1 ms, then we do 1 ms sleeps.
+  jlong sleep_ns = 1;
+  if (iteration > 20) {
+    sleep_ns = NANOUNITS / MILLIUNITS;  // 1 ms
+  }
+  os::naked_short_nanosleep(sleep_ns);
+int SafepointSynchronize::synchronize_threads(jlong safepoint_limit_time, int nof_threads, int* initial_running)
+  JavaThreadIteratorWithHandle jtiwh;
+#ifdef ASSERT
+  for (; JavaThread *cur = jtiwh.next(); ) {
+    assert(cur->safepoint_state()->is_running(), "Illegal initial state");
+  }
+  jtiwh.rewind();
+#endif // ASSERT
+  // Iterate through all threads until it has been determined how to stop them all at a safepoint.
+  int still_running = nof_threads;
+  ThreadSafepointState *tss_head = NULL;
+  ThreadSafepointState **p_prev = &tss_head;
+  for (; JavaThread *cur = jtiwh.next(); ) {
+    ThreadSafepointState *cur_tss = cur->safepoint_state();
+    assert(cur_tss->get_next() == NULL, "Must be NULL");
+    if (thread_not_running(cur_tss)) {
+      --still_running;
+    } else {
+      *p_prev = cur_tss;
+      p_prev = cur_tss->next_ptr();
+    }
+  }
+  *p_prev = NULL;
+  DEBUG_ONLY(assert_list_is_valid(tss_head, still_running);)
+  *initial_running = still_running;
   if (log_is_enabled(Debug, safepoint, stats)) {
-    _safepoint_begin_time = os::javaTimeNanos();
-    _ts_of_current_safepoint = tty->time_stamp().seconds();
-    _nof_threads_hit_polling_page = 0;
+    begin_statistics(nof_threads, still_running);
-  Universe::heap()->safepoint_synchronize_begin();
+  int iterations = 1; // The first iteration is above.
-  // By getting the Threads_lock, we assure that no threads are about to start or
-  // exit. It is released again in SafepointSynchronize::end().
-  Threads_lock->lock();
+  while (still_running > 0) {
+    // Check if this has taken too long:
+    if (SafepointTimeout && safepoint_limit_time < os::javaTimeNanos()) {
+      print_safepoint_timeout(_spinning_timeout);
+    }
+    if (int(iterations) == -1) { // overflow - something is wrong.
+      // We can only overflow here when we are using global
+      // polling pages. We keep this guarantee in its original
+      // form so that searches of the bug database for this
+      // failure mode find the right bugs.
+      guarantee (!PageArmed, "invariant");
+    }
-  assert( _state == _not_synchronized, "trying to safepoint synchronize with wrong state");
+    p_prev = &tss_head;
+    ThreadSafepointState *cur_tss = tss_head;
+    while (cur_tss != NULL) {
+      assert(cur_tss->is_running(), "Illegal initial state");
+      if (thread_not_running(cur_tss)) {
+        --still_running;
+        *p_prev = NULL;
+        ThreadSafepointState *tmp = cur_tss;
+        cur_tss = cur_tss->get_next();
+        tmp->set_next(NULL);
+      } else {
+        *p_prev = cur_tss;
+        p_prev = cur_tss->next_ptr();
+        cur_tss = cur_tss->get_next();
+      }
+    }
-  int nof_threads = Threads::number_of_threads();
+    DEBUG_ONLY(assert_list_is_valid(tss_head, still_running);)
-  log_debug(safepoint)("Safepoint synchronization initiated. (%d threads)", nof_threads);
+    if (still_running > 0) {
+      back_off(iterations);
+    }
-  RuntimeService::record_safepoint_begin();
+    iterations++;
+  }
-  MutexLocker mu(Safepoint_lock);
+  assert(tss_head == NULL, "Must be empty");
-  // Reset the count of active JNI critical threads
-  _current_jni_active_count = 0;
-  // Set number of threads to wait for, before we initiate the callbacks
-  _waiting_to_block = nof_threads;
-  TryingToBlock     = 0 ;
-  int still_running = nof_threads;
-  // Save the starting time, so that it can be compared to see if this has taken
-  // too long to complete.
-  jlong safepoint_limit_time = 0;
-  timeout_error_printed = false;
+  if (log_is_enabled(Debug, safepoint, stats)) {
+    update_statistics_on_spin_end();
+  }
+  return iterations;
+void SafepointSynchronize::arm_safepoint() {
   // Begin the process of bringing the system to a safepoint.
   // Java threads can be in several different states and are
   // stopped by different mechanisms:
@@ -216,7 +315,7 @@ void SafepointSynchronize::begin() {
   //     memory writes are serialized with respect to each other,
   //     the VM thread issues a memory barrier instruction.
   //  3. Running compiled Code
-  //     Compiled code reads a global (Safepoint Polling) page that
+  //     Compiled code reads the local polling page that
   //     is set to fault if we are trying to get to a safepoint.
   //  4. Blocked
   //     A thread which is blocked will not be allowed to return from the
@@ -226,275 +325,154 @@ void SafepointSynchronize::begin() {
   //     between states, the safepointing code will wait for the thread to
   //     block itself when it attempts transitions to a new state.
-  {
-    EventSafepointStateSynchronization sync_event;
-    int initial_running = 0;
-    _state            = _synchronizing;
+  // We must never miss a thread with correct safepoint id, so we must make sure we arm
+  // the wait barrier for the next safepoint id/counter.
+  // Arming must be done after resetting _current_jni_active_count, _waiting_to_block.
+  _wait_barrier->arm(static_cast<int>(_safepoint_counter + 1));
-    if (SafepointMechanism::uses_thread_local_poll()) {
-      // Arming the per thread poll while having _state != _not_synchronized means safepointing
-      log_trace(safepoint)("Setting thread local yield flag for threads");
-      OrderAccess::storestore(); // storestore, global state -> local state
-      for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur = jtiwh.next(); ) {
-        // Make sure the threads start polling, it is time to yield.
-        SafepointMechanism::arm_local_poll(cur);
-      }
-    }
-    OrderAccess::fence(); // storestore|storeload, global state -> local state
+  assert((_safepoint_counter & 0x1) == 0, "must be even");
+  // The store to _safepoint_counter must happen after any stores in arming.
+  OrderAccess::release_store(&_safepoint_counter, _safepoint_counter + 1);
-    if (SafepointMechanism::uses_global_page_poll()) {
-      // Make interpreter safepoint aware
-      Interpreter::notice_safepoints();
+  // We are synchronizing
+  OrderAccess::storestore(); // Ordered with _safepoint_counter
+  _state = _synchronizing;
-      // Make polling safepoint aware
-      guarantee (PageArmed == 0, "invariant") ;
-      PageArmed = 1 ;
-      os::make_polling_page_unreadable();
-    }
-    // Consider using active_processor_count() ... but that call is expensive.
-    int ncpus = os::processor_count() ;
-    unsigned int iterations = 0;
-    {
-      JavaThreadIteratorWithHandle jtiwh;
-#ifdef ASSERT
-      for (; JavaThread *cur = jtiwh.next(); ) {
-        assert(cur->safepoint_state()->is_running(), "Illegal initial state");
-        // Clear the visited flag to ensure that the critical counts are collected properly.
-        cur->set_visited_for_critical_count(false);
-      }
-#endif // ASSERT
-      if (SafepointTimeout)
-        safepoint_limit_time = os::javaTimeNanos() + (jlong)SafepointTimeoutDelay * MICROUNITS;
-      // Iterate through all threads until it have been determined how to stop them all at a safepoint
-      int steps = 0 ;
-      while(still_running > 0) {
-        jtiwh.rewind();
-        for (; JavaThread *cur = jtiwh.next(); ) {
-          assert(!cur->is_ConcurrentGC_thread(), "A concurrent GC thread is unexpectly being suspended");
-          ThreadSafepointState *cur_state = cur->safepoint_state();
-          if (cur_state->is_running()) {
-            cur_state->examine_state_of_thread();
-            if (!cur_state->is_running()) {
-              still_running--;
-              // consider adjusting steps downward:
-              //   steps = 0
-              //   steps -= NNN
-              //   steps >>= 1
-              //   steps = MIN(steps, 2000-100)
-              //   if (iterations != 0) steps -= NNN
-            }
-            LogTarget(Trace, safepoint) lt;
-            if (lt.is_enabled()) {
-              ResourceMark rm;
-              LogStream ls(lt);
-              cur_state->print_on(&ls);
-            }
-          }
-        }
-        if (iterations == 0) {
-          initial_running = still_running;
-          if (log_is_enabled(Debug, safepoint, stats)) {
-            begin_statistics(nof_threads, still_running);
-          }
-        }
-        if (still_running > 0) {
-          // Check for if it takes to long
-          if (SafepointTimeout && safepoint_limit_time < os::javaTimeNanos()) {
-            print_safepoint_timeout(_spinning_timeout);
-          }
-          // Spin to avoid context switching.
-          // There's a tension between allowing the mutators to run (and rendezvous)
-          // vs spinning.  As the VM thread spins, wasting cycles, it consumes CPU that
-          // a mutator might otherwise use profitably to reach a safepoint.  Excessive
-          // spinning by the VM thread on a saturated system can increase rendezvous latency.
-          // Blocking or yielding incur their own penalties in the form of context switching
-          // and the resultant loss of $ residency.
-          //
-          // Further complicating matters is that yield() does not work as naively expected
-          // on many platforms -- yield() does not guarantee that any other ready threads
-          // will run.   As such we revert to naked_short_sleep() after some number of iterations.
-          // nakes_short_sleep() is implemented as a short unconditional sleep.
-          // Typical operating systems round a "short" sleep period up to 10 msecs, so sleeping
-          // can actually increase the time it takes the VM thread to detect that a system-wide
-          // stop-the-world safepoint has been reached.  In a pathological scenario such as that
-          // described in CR6415670 the VMthread may sleep just before the mutator(s) become safe.
-          // In that case the mutators will be stalled waiting for the safepoint to complete and the
-          // the VMthread will be sleeping, waiting for the mutators to rendezvous.  The VMthread
-          // will eventually wake up and detect that all mutators are safe, at which point
-          // we'll again make progress.
-          //
-          // Beware too that that the VMThread typically runs at elevated priority.
-          // Its default priority is higher than the default mutator priority.
-          // Obviously, this complicates spinning.
-          //
-          // Note too that on Windows XP SwitchThreadTo() has quite different behavior than Sleep(0).
-          // Sleep(0) will _not yield to lower priority threads, while SwitchThreadTo() will.
-          //
-          // See the comments in synchronizer.cpp for additional remarks on spinning.
-          //
-          // In the future we might:
-          // -- Modify the safepoint scheme to avoid potentially unbounded spinning.
-          //    This is tricky as the path used by a thread exiting the JVM (say on
-          //    on JNI call-out) simply stores into its state field.  The burden
-          //    is placed on the VM thread, which must poll (spin).
-          // -- Find something useful to do while spinning.  If the safepoint is GC-related
-          //    we might aggressively scan the stacks of threads that are already safe.
-          // -- YieldTo() any still-running mutators that are ready but OFFPROC.
-          // -- Check system saturation.  If the system is not fully saturated then
-          //    simply spin and avoid sleep/yield.
-          // -- As still-running mutators rendezvous they could unpark the sleeping
-          //    VMthread.  This works well for still-running mutators that become
-          //    safe.  The VMthread must still poll for mutators that call-out.
-          // -- Drive the policy on time-since-begin instead of iterations.
-          // -- Consider making the spin duration a function of the # of CPUs:
-          //    Spin = (((ncpus-1) * M) + K) + F(still_running)
-          //    Alternately, instead of counting iterations of the outer loop
-          //    we could count the # of threads visited in the inner loop, above.
-          // -- On windows consider using the return value from SwitchThreadTo()
-          //    to drive subsequent spin/SwitchThreadTo()/Sleep(N) decisions.
-          if (int(iterations) == -1) { // overflow - something is wrong.
-            // We can only overflow here when we are using global
-            // polling pages. We keep this guarantee in its original
-            // form so that searches of the bug database for this
-            // failure mode find the right bugs.
-            guarantee (PageArmed == 0, "invariant");
-          }
-          // Instead of (ncpus > 1) consider either (still_running < (ncpus + EPSILON)) or
-          // ((still_running + _waiting_to_block - TryingToBlock)) < ncpus)
-          ++steps ;
-          if (ncpus > 1 && steps < safepoint_spin_before_yield) {
-            SpinPause() ;     // MP-Polite spin
-          } else
-            if (steps < _defer_thr_suspend_loop_count) {
-              os::naked_yield() ;
-            } else {
-              os::naked_short_sleep(1);
-            }
-          iterations ++ ;
-        }
-        assert(iterations < (uint)max_jint, "We have been iterating in the safepoint loop too long");
-      }
-    } // ThreadsListHandle destroyed here.
-    assert(still_running == 0, "sanity check");
-    if (log_is_enabled(Debug, safepoint, stats)) {
-      update_statistics_on_spin_end();
-    }
-    if (sync_event.should_commit()) {
-      post_safepoint_synchronize_event(&sync_event, initial_running, _waiting_to_block, iterations);
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    // Arming the per thread poll while having _state != _not_synchronized means safepointing
+    log_trace(safepoint)("Setting thread local yield flag for threads");
+    OrderAccess::storestore(); // storestore, global state -> local state
+    for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur = jtiwh.next(); ) {
+      // Make sure the threads start polling, it is time to yield.
+      SafepointMechanism::arm_local_poll(cur);
+  OrderAccess::fence(); // storestore|storeload, global state -> local state
-  // wait until all threads are stopped
-  {
-    EventSafepointWaitBlocked wait_blocked_event;
-    int initial_waiting_to_block = _waiting_to_block;
+  if (SafepointMechanism::uses_global_page_poll()) {
+    // Make interpreter safepoint aware
+    Interpreter::notice_safepoints();
-    while (_waiting_to_block > 0) {
-      log_debug(safepoint)("Waiting for %d thread(s) to block", _waiting_to_block);
-      if (!SafepointTimeout || timeout_error_printed) {
-        Safepoint_lock->wait(true);  // true, means with no safepoint checks
-      } else {
-        // Compute remaining time
-        jlong remaining_time = safepoint_limit_time - os::javaTimeNanos();
+    // Make polling safepoint aware
+    guarantee (!PageArmed, "invariant") ;
+    PageArmed = true;
+    os::make_polling_page_unreadable();
+  }
-        // If there is no remaining time, then there is an error
-        if (remaining_time < 0 || Safepoint_lock->wait(true, remaining_time / MICROUNITS)) {
-          print_safepoint_timeout(_blocking_timeout);
-        }
-      }
-    }
-    assert(_waiting_to_block == 0, "sanity check");
+// Roll all threads forward to a safepoint and suspend them all
+void SafepointSynchronize::begin() {
+  EventSafepointBegin begin_event;
+  assert(Thread::current()->is_VM_thread(), "Only VM thread may execute a safepoint");
+  strncpy(stopped_description, VMThread::vm_safepoint_description(), sizeof(stopped_description) - 1);
+  stopped_description[sizeof(stopped_description) - 1] = '\0';
+  if (log_is_enabled(Debug, safepoint, stats)) {
+    _safepoint_begin_time = os::javaTimeNanos();
+    _ts_of_current_safepoint = tty->time_stamp().seconds();
+    _nof_threads_hit_polling_page = 0;
+  }
+  Universe::heap()->safepoint_synchronize_begin();
+  // By getting the Threads_lock, we assure that no threads are about to start or
+  // exit. It is released again in SafepointSynchronize::end().
+  Threads_lock->lock();
+  assert( _state == _not_synchronized, "trying to safepoint synchronize with wrong state");
+  int nof_threads = Threads::number_of_threads();
+  log_debug(safepoint)("Safepoint synchronization initiated using %s wait barrier. (%d threads)", _wait_barrier->description(), nof_threads);
+  RuntimeService::record_safepoint_begin();
+  // Reset the count of active JNI critical threads
+  _current_jni_active_count = 0;
+  // Set number of threads to wait for
+  _waiting_to_block = nof_threads;
+  jlong safepoint_limit_time = 0;
+  if (SafepointTimeout) {
+    // Set the limit time, so that it can be compared to see if this has taken
+    // too long to complete.
+    safepoint_limit_time = os::javaTimeNanos() + (jlong)SafepointTimeoutDelay * MICROUNITS;
+  }
+  timeout_error_printed = false;
+  EventSafepointStateSynchronization sync_event;
+  int initial_running = 0;
+  // Arms the safepoint, _current_jni_active_count and _waiting_to_block must be set before.
+  arm_safepoint();
+  // Will spin until all threads are safe.
+  int iterations = synchronize_threads(safepoint_limit_time, nof_threads, &initial_running);
+  assert(_waiting_to_block == 0, "No thread should be running");
+  post_safepoint_synchronize_event(sync_event, _safepoint_counter, initial_running,
+                                   _waiting_to_block, iterations);
+  // Keep event from now.
+  EventSafepointWaitBlocked wait_blocked_event;
 #ifndef PRODUCT
-    if (SafepointTimeout) {
-      jlong current_time = os::javaTimeNanos();
-      if (safepoint_limit_time < current_time) {
-        log_warning(safepoint)("# SafepointSynchronize: Finished after "
-                      INT64_FORMAT_W(6) " ms",
-                      (int64_t)((current_time - safepoint_limit_time) / MICROUNITS +
-                                (jlong)SafepointTimeoutDelay));
-      }
-    }
-    assert((_safepoint_counter & 0x1) == 0, "must be even");
-    assert(Threads_lock->owned_by_self(), "must hold Threads_lock");
-    _safepoint_counter ++;
-    // Record state
-    _state = _synchronized;
-    OrderAccess::fence();
-    if (wait_blocked_event.should_commit()) {
-      post_safepoint_wait_blocked_event(&wait_blocked_event, initial_waiting_to_block);
+  if (SafepointTimeout) {
+    jlong current_time = os::javaTimeNanos();
+    if (safepoint_limit_time < current_time) {
+      log_warning(safepoint)("# SafepointSynchronize: Finished after "
+                    INT64_FORMAT_W(6) " ms",
+                    (int64_t)((current_time - safepoint_limit_time) / MICROUNITS +
+                              (jlong)SafepointTimeoutDelay));
+  assert(Threads_lock->owned_by_self(), "must hold Threads_lock");
+  // Record state
+  _state = _synchronized;
+  OrderAccess::fence();
+  post_safepoint_wait_blocked_event(wait_blocked_event, _safepoint_counter, 0);
 #ifdef ASSERT
   // Make sure all the threads were visited.
   for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur = jtiwh.next(); ) {
-    assert(cur->was_visited_for_critical_count(), "missed a thread");
+    assert(cur->was_visited_for_critical_count(_safepoint_counter), "missed a thread");
 #endif // ASSERT
   // Update the count of active JNI critical regions
-  log_info(safepoint)("Entering safepoint region: %s", VMThread::vm_safepoint_description());
+  log_info(safepoint)("Entering safepoint region: %s", stopped_description);
   if (log_is_enabled(Debug, safepoint, stats)) {
-  // Call stuff that needs to be run when a safepoint is just about to be completed
-  {
-    EventSafepointCleanup cleanup_event;
-    do_cleanup_tasks();
-    if (cleanup_event.should_commit()) {
-      post_safepoint_cleanup_event(&cleanup_event);
-    }
-  }
+  // We do the safepoint cleanup first since a GC related safepoint
+  // needs cleanup to be completed before running the GC op.
+  EventSafepointCleanup cleanup_event;
+  do_cleanup_tasks();
+  post_safepoint_cleanup_event(cleanup_event, _safepoint_counter);
   if (log_is_enabled(Debug, safepoint, stats)) {
     // Record how much time spend on the above cleanup tasks
-  if (begin_event.should_commit()) {
-    post_safepoint_begin_event(&begin_event, nof_threads, _current_jni_active_count);
-  }
+  post_safepoint_begin_event(begin_event, _safepoint_counter, nof_threads, _current_jni_active_count);
-// Wake up all threads, so they are ready to resume execution after the safepoint
-// operation has been carried out
-void SafepointSynchronize::end() {
-  assert(Threads_lock->owned_by_self(), "must hold Threads_lock");
-  assert((_safepoint_counter & 0x1) == 1, "must be odd");
-  EventSafepointEnd event;
-  _safepoint_counter ++;
-  // memory fence isn't required here since an odd _safepoint_counter
-  // value can do no harm and a fence is issued below anyway.
-  DEBUG_ONLY(Thread* myThread = Thread::current();)
-  assert(myThread->is_VM_thread(), "Only VM thread can execute a safepoint");
-  if (log_is_enabled(Debug, safepoint, stats)) {
-    end_statistics(os::javaTimeNanos());
-  }
+void SafepointSynchronize::disarm_safepoint() {
+  uint64_t safepoint_id = _safepoint_counter;
     JavaThreadIteratorWithHandle jtiwh;
 #ifdef ASSERT
@@ -508,66 +486,74 @@ void SafepointSynchronize::end() {
 #endif // ASSERT
-    if (PageArmed) {
-      assert(SafepointMechanism::uses_global_page_poll(), "sanity");
+    if (SafepointMechanism::uses_global_page_poll()) {
+      guarantee (PageArmed, "invariant");
       // Make polling safepoint aware
-      PageArmed = 0 ;
-    }
-    if (SafepointMechanism::uses_global_page_poll()) {
+      PageArmed = false;
       // Remove safepoint check from interpreter
-    {
-      MutexLocker mu(Safepoint_lock);
+    OrderAccess::fence(); // keep read and write of _state from floating up
+    assert(_state == _synchronized, "must be synchronized before ending safepoint synchronization");
-      assert(_state == _synchronized, "must be synchronized before ending safepoint synchronization");
+    // Change state first to _not_synchronized.
+    // No threads should see _synchronized when running.
+    _state = _not_synchronized;
-      if (SafepointMechanism::uses_thread_local_poll()) {
-        _state = _not_synchronized;
-        OrderAccess::storestore(); // global state -> local state
-        jtiwh.rewind();
-        for (; JavaThread *current = jtiwh.next(); ) {
-          ThreadSafepointState* cur_state = current->safepoint_state();
-          cur_state->restart(); // TSS _running
-          SafepointMechanism::disarm_local_poll(current);
-        }
-        log_info(safepoint)("Leaving safepoint region");
-      } else {
-        // Set to not synchronized, so the threads will not go into the signal_thread_blocked method
-        // when they get restarted.
-        _state = _not_synchronized;
-        OrderAccess::fence();
+    // Set the next dormant (even) safepoint id.
+    assert((_safepoint_counter & 0x1) == 1, "must be odd");
+    OrderAccess::release_store(&_safepoint_counter, _safepoint_counter + 1);
-        log_info(safepoint)("Leaving safepoint region");
+    OrderAccess::fence(); // Keep the local state from floating up.
-        // Start suspended threads
-        jtiwh.rewind();
-        for (; JavaThread *current = jtiwh.next(); ) {
-          ThreadSafepointState* cur_state = current->safepoint_state();
-          assert(cur_state->type() != ThreadSafepointState::_running, "Thread not suspended at safepoint");
-          cur_state->restart();
-          assert(cur_state->is_running(), "safepoint state has not been reset");
-        }
-      }
-      RuntimeService::record_safepoint_end();
-      // Release threads lock, so threads can be created/destroyed again.
-      // It will also release all threads blocked in signal_thread_blocked.
-      Threads_lock->unlock();
+    jtiwh.rewind();
+    for (; JavaThread *current = jtiwh.next(); ) {
+      // Clear the visited flag to ensure that the critical counts are collected properly.
+      DEBUG_ONLY(current->reset_visited_for_critical_count(safepoint_id);)
+      ThreadSafepointState* cur_state = current->safepoint_state();
+      assert(!cur_state->is_running(), "Thread not suspended at safepoint");
+      cur_state->restart(); // TSS _running
+      assert(cur_state->is_running(), "safepoint state has not been reset");
+      SafepointMechanism::disarm_local_poll(current);
-  } // ThreadsListHandle destroyed here.
+  } // ~JavaThreadIteratorWithHandle
+  log_info(safepoint)("Leaving safepoint region");
+  RuntimeService::record_safepoint_end();
+  // Release threads lock, so threads can be created/destroyed again.
+  Threads_lock->unlock();
+  // Wake threads after local state is correctly set.
+  _wait_barrier->disarm();
+// Wake up all threads, so they are ready to resume execution after the safepoint
+// operation has been carried out
+void SafepointSynchronize::end() {
+  assert(Threads_lock->owned_by_self(), "must hold Threads_lock");
+  EventSafepointEnd event;
+  uint64_t safepoint_id = _safepoint_counter;
+  assert(Thread::current()->is_VM_thread(), "Only VM thread can execute a safepoint");
+  if (log_is_enabled(Debug, safepoint, stats)) {
+    end_statistics(os::javaTimeNanos());
+  }
+  disarm_safepoint();
+  RuntimeService::record_safepoint_epilog(stopped_description);
   // record this time so VMThread can keep track how much time has elapsed
   // since last safepoint.
   _end_of_last_safepoint = os::javaTimeMillis();
-  if (event.should_commit()) {
-    post_safepoint_end_event(&event);
-  }
+  post_safepoint_end_event(event, safepoint_id);
 bool SafepointSynchronize::is_cleanup_needed() {
@@ -613,6 +599,7 @@ public:
     _counters(counters) {}
   void work(uint worker_id) {
+    uint64_t safepoint_id = SafepointSynchronize::safepoint_counter();
     // All threads deflate monitors and mark nmethods (if necessary).
     Threads::possibly_parallel_threads_do(true, &_cleanup_threads_cl);
@@ -621,9 +608,8 @@ public:
       EventSafepointCleanupTask event;
       TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-      if (event.should_commit()) {
-        post_safepoint_cleanup_task_event(&event, name);
-      }
+      post_safepoint_cleanup_task_event(event, safepoint_id, name);
     if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_UPDATE_INLINE_CACHES)) {
@@ -631,9 +617,8 @@ public:
       EventSafepointCleanupTask event;
       TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-      if (event.should_commit()) {
-        post_safepoint_cleanup_task_event(&event, name);
-      }
+      post_safepoint_cleanup_task_event(event, safepoint_id, name);
     if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_COMPILATION_POLICY)) {
@@ -641,9 +626,8 @@ public:
       EventSafepointCleanupTask event;
       TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-      if (event.should_commit()) {
-        post_safepoint_cleanup_task_event(&event, name);
-      }
+      post_safepoint_cleanup_task_event(event, safepoint_id, name);
     if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_SYMBOL_TABLE_REHASH)) {
@@ -652,9 +636,8 @@ public:
         EventSafepointCleanupTask event;
         TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-        if (event.should_commit()) {
-          post_safepoint_cleanup_task_event(&event, name);
-        }
+        post_safepoint_cleanup_task_event(event, safepoint_id, name);
@@ -664,9 +647,8 @@ public:
         EventSafepointCleanupTask event;
         TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-        if (event.should_commit()) {
-          post_safepoint_cleanup_task_event(&event, name);
-        }
+        post_safepoint_cleanup_task_event(event, safepoint_id, name);
@@ -677,9 +659,8 @@ public:
       EventSafepointCleanupTask event;
       TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-      if (event.should_commit()) {
-        post_safepoint_cleanup_task_event(&event, name);
-      }
+      post_safepoint_cleanup_task_event(event, safepoint_id, name);
     if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_SYSTEM_DICTIONARY_RESIZE)) {
@@ -687,9 +668,8 @@ public:
       EventSafepointCleanupTask event;
       TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-      if (event.should_commit()) {
-        post_safepoint_cleanup_task_event(&event, name);
-      }
+      post_safepoint_cleanup_task_event(event, safepoint_id, name);
@@ -736,15 +716,48 @@ void SafepointSynchronize::do_cleanup_tasks() {
   assert(InlineCacheBuffer::is_empty(), "should have cleaned up ICBuffer");
+// Methods for determining if a JavaThread is safepoint safe.
-bool SafepointSynchronize::safepoint_safe(JavaThread *thread, JavaThreadState state) {
+// False means unsafe with undetermined state.
+// True means a determined state, but it may be an unsafe state.
+// If called from a non-safepoint context safepoint_count MUST be InactiveSafepointCounter.
+bool SafepointSynchronize::try_stable_load_state(JavaThreadState *state, JavaThread *thread, uint64_t safepoint_count) {
+  assert((safepoint_count != InactiveSafepointCounter &&
+          Thread::current() == (Thread*)VMThread::vm_thread() &&
+          SafepointSynchronize::_state != _not_synchronized)
+         || safepoint_count == InactiveSafepointCounter, "Invalid check");
+  // To handle the thread_blocked state on the backedge of the WaitBarrier from
+  // previous safepoint and reading the reset value (0/InactiveSafepointCounter) we
+  // re-read state after we read thread safepoint id. The JavaThread changes its
+  // thread state from thread_blocked before resetting safepoint id to 0.
+  // This guarantees the second read will be from an updated thread state. It can
+  // either be different state making this an unsafe state or it can see blocked
+  // again. When we see blocked twice with a 0 safepoint id, either:
+  // - It is normally blocked, e.g. on Mutex, TBIVM.
+  // - It was in SS:block(), looped around to SS:block() and is blocked on the WaitBarrier.
+  // - It was in SS:block() but now on a Mutex.
+  // All of these cases are safe.
+  *state = thread->thread_state();
+  OrderAccess::loadload();
+  uint64_t sid = thread->safepoint_state()->get_safepoint_id();  // Load acquire
+  if (sid != InactiveSafepointCounter && sid != safepoint_count) {
+    // In an old safepoint, state not relevant.
+    return false;
+  }
+  return *state == thread->thread_state();
+static bool safepoint_safe_with(JavaThread *thread, JavaThreadState state) {
   switch(state) {
   case _thread_in_native:
     // native threads are safe if they have no java stack or have walkable stack
     return !thread->has_last_Java_frame() || thread->frame_anchor()->walkable();
-   // blocked threads should have already have walkable stack
   case _thread_blocked:
+    // On wait_barrier or blocked.
+    // Blocked threads should already have walkable stack.
     assert(!thread->has_last_Java_frame() || thread->frame_anchor()->walkable(), "blocked and not walkable");
     return true;
@@ -753,12 +766,28 @@ bool SafepointSynchronize::safepoint_safe(JavaThread *thread, JavaThreadState st
+bool SafepointSynchronize::handshake_safe(JavaThread *thread) {
+  // The polls must be armed otherwise the safe state can change to unsafe at any time.
+  assert(SafepointMechanism::should_block(thread), "Must be armed");
+  // This function must be called with the Threads_lock held so an externally
+  // suspended thread cannot be resumed thus it is safe.
+  assert(Threads_lock->owned_by_self() && Thread::current()->is_VM_thread(),
+         "Must hold Threads_lock and be VMThread");
+  if (thread->is_ext_suspended() || thread->is_terminated()) {
+    return true;
+  }
+  JavaThreadState stable_state;
+  if (try_stable_load_state(&stable_state, thread, InactiveSafepointCounter)) {
+    return safepoint_safe_with(thread, stable_state);
+  }
+  return false;
 // See if the thread is running inside a lazy critical native and
 // update the thread critical count if so.  Also set a suspend flag to
 // cause the native wrapper to return into the JVM to do the unlock
 // once the native finishes.
-void SafepointSynchronize::check_for_lazy_critical_native(JavaThread *thread, JavaThreadState state) {
+static void check_for_lazy_critical_native(JavaThread *thread, JavaThreadState state) {
   if (state == _thread_in_native &&
       thread->has_last_Java_frame() &&
       thread->frame_anchor()->walkable()) {
@@ -788,12 +817,10 @@ void SafepointSynchronize::check_for_lazy_critical_native(JavaThread *thread, Ja
 // -------------------------------------------------------------------------------------------------------
-// Implementation of Safepoint callback point
+// Implementation of Safepoint blocking point
-void SafepointSynchronize::block(JavaThread *thread, bool block_in_safepoint_check) {
+void SafepointSynchronize::block(JavaThread *thread) {
   assert(thread != NULL, "thread must be set");
   assert(thread->is_Java_thread(), "not a Java thread");
@@ -813,101 +840,45 @@ void SafepointSynchronize::block(JavaThread *thread, bool block_in_safepoint_che
   JavaThreadState state = thread->thread_state();
+  uint64_t safepoint_id = SafepointSynchronize::safepoint_counter();
   // Check that we have a valid thread_state at this point
   switch(state) {
     case _thread_in_vm_trans:
     case _thread_in_Java:        // From compiled code
-      // We are highly likely to block on the Safepoint_lock. In order to avoid blocking in this case,
-      // we pretend we are still in the VM.
-      thread->set_thread_state(_thread_in_vm);
-      if (is_synchronizing()) {
-         Atomic::inc (&TryingToBlock) ;
-      }
-      // We will always be holding the Safepoint_lock when we are examine the state
-      // of a thread. Hence, the instructions between the Safepoint_lock->lock() and
-      // Safepoint_lock->unlock() are happening atomic with regards to the safepoint code
-      Safepoint_lock->lock_without_safepoint_check();
-      if (is_synchronizing()) {
-        // Decrement the number of threads to wait for and signal vm thread
-        assert(_waiting_to_block > 0, "sanity check");
-        _waiting_to_block--;
-        thread->safepoint_state()->set_has_called_back(true);
-        DEBUG_ONLY(thread->set_visited_for_critical_count(true));
-        if (thread->in_critical()) {
-          // Notice that this thread is in a critical section
-          increment_jni_active_count();
-        }
-        // Consider (_waiting_to_block < 2) to pipeline the wakeup of the VM thread
-        if (_waiting_to_block == 0) {
-          Safepoint_lock->notify_all();
-        }
-      }
-      if (block_in_safepoint_check) {
-        // We transition the thread to state _thread_blocked here, but
-        // we can't do our usual check for external suspension and then
-        // self-suspend after the lock_without_safepoint_check() call
-        // below because we are often called during transitions while
-        // we hold different locks. That would leave us suspended while
-        // holding a resource which results in deadlocks.
-        thread->set_thread_state(_thread_blocked);
-        Safepoint_lock->unlock();
-        // We now try to acquire the threads lock. Since this lock is hold by the VM thread during
-        // the entire safepoint, the threads will all line up here during the safepoint.
-        Threads_lock->lock_without_safepoint_check();
-        // restore original state. This is important if the thread comes from compiled code, so it
-        // will continue to execute with the _thread_in_Java state.
-        thread->set_thread_state(state);
-        Threads_lock->unlock();
-      } else {
-        // We choose not to block in this call since we would be
-        // caught when transitioning back anyways if the safepoint
-        // is still going on.
-        thread->set_thread_state(state);
-        Safepoint_lock->unlock();
-      }
-      break;
     case _thread_in_native_trans:
     case _thread_blocked_trans:
     case _thread_new_trans:
-      if (thread->safepoint_state()->type() == ThreadSafepointState::_call_back &&
-          block_in_safepoint_check) {
-        thread->print_thread_state();
-        fatal("Deadlock in safepoint code.  "
-              "Should have called back to the VM before blocking.");
-      }
-      // We transition the thread to state _thread_blocked here, but
-      // we can't do our usual check for external suspension and then
-      // self-suspend after the lock_without_safepoint_check() call
-      // below because we are often called during transitions while
-      // we hold different locks. That would leave us suspended while
-      // holding a resource which results in deadlocks.
+      // We have no idea where the VMThread is, it might even be at next safepoint.
+      // So we can miss this poll, but stop at next.
+      // Load dependent store, it must not pass loading of safepoint_id.
+      thread->safepoint_state()->set_safepoint_id(safepoint_id); // Release store
+      // This part we can skip if we notice we miss or are in a future safepoint.
+      OrderAccess::storestore();
-      // It is not safe to suspend a thread if we discover it is in _thread_in_native_trans. Hence,
-      // the safepoint code might still be waiting for it to block. We need to change the state here,
-      // so it can see that it is at a safepoint.
+      OrderAccess::fence(); // Load in wait barrier should not float up
+      _wait_barrier->wait(static_cast<int>(safepoint_id));
+      assert(_state != _synchronized, "Can't be");
-      // Block until the safepoint operation is completed.
-      Threads_lock->lock_without_safepoint_check();
-      // Restore state
+      // If barrier is disarmed stop store from floating above loads in barrier.
+      OrderAccess::loadstore();
-      Threads_lock->unlock();
+      // Then we reset the safepoint id to inactive.
+      thread->safepoint_state()->reset_safepoint_id(); // Release store
+      OrderAccess::fence();
      fatal("Illegal threadstate encountered: %d", state);
+  guarantee(thread->safepoint_state()->get_safepoint_id() == InactiveSafepointCounter,
+            "The safepoint id should be set only in block path");
   // Check for pending. async. exceptions or suspends - except if the
   // thread was blocked inside the VM. has_special_runtime_exit_condition()
@@ -979,7 +950,7 @@ void SafepointSynchronize::print_safepoint_timeout(SafepointTimeoutReason reason
         if (cur_thread->thread_state() != _thread_blocked &&
           ((reason == _spinning_timeout && cur_state->is_running()) ||
-             (reason == _blocking_timeout && !cur_state->has_called_back()))) {
+             (reason == _blocking_timeout))) {
           ls.print("# ");
@@ -1001,11 +972,10 @@ void SafepointSynchronize::print_safepoint_timeout(SafepointTimeoutReason reason
 // -------------------------------------------------------------------------------------------------------
 // Implementation of ThreadSafepointState
-ThreadSafepointState::ThreadSafepointState(JavaThread *thread) {
-  _thread = thread;
-  _type   = _running;
-  _has_called_back = false;
-  _at_poll_safepoint = false;
+ThreadSafepointState::ThreadSafepointState(JavaThread *thread)
+  : _at_poll_safepoint(false), _thread(thread), _safepoint_safe(false),
+    _safepoint_id(SafepointSynchronize::InactiveSafepointCounter),
+    _orig_thread_state(_thread_uninitialized), _next(NULL) {
 void ThreadSafepointState::create(JavaThread *thread) {
@@ -1020,13 +990,30 @@ void ThreadSafepointState::destroy(JavaThread *thread) {
-void ThreadSafepointState::examine_state_of_thread() {
+uint64_t ThreadSafepointState::get_safepoint_id() const {
+  return OrderAccess::load_acquire(&_safepoint_id);
+void ThreadSafepointState::reset_safepoint_id() {
+  OrderAccess::release_store(&_safepoint_id, SafepointSynchronize::InactiveSafepointCounter);
+void ThreadSafepointState::set_safepoint_id(uint64_t safepoint_id) {
+  OrderAccess::release_store(&_safepoint_id, safepoint_id);
+void ThreadSafepointState::examine_state_of_thread(uint64_t safepoint_count) {
   assert(is_running(), "better be running or just have hit safepoint poll");
-  JavaThreadState state = _thread->thread_state();
+  JavaThreadState stable_state;
+  if (!SafepointSynchronize::try_stable_load_state(&stable_state, _thread, safepoint_count)) {
+    // We could not get stable state of the JavaThread.
+    // Consider it running and just return.
+    return;
+  }
   // Save the state at the start of safepoint processing.
-  _orig_thread_state = state;
+  _orig_thread_state = stable_state;
   // Check for a thread that is suspended. Note that thread resume tries
   // to grab the Threads_lock which we own here, so a thread cannot be
@@ -1050,21 +1037,13 @@ void ThreadSafepointState::examine_state_of_thread() {
   bool is_suspended = _thread->is_ext_suspended();
   if (is_suspended) {
-    roll_forward(_at_safepoint);
+    account_safe_thread();
-  // Some JavaThread states have an initial safepoint state of
-  // running, but are actually at a safepoint. We will happily
-  // agree and update the safepoint state here.
-  if (SafepointSynchronize::safepoint_safe(_thread, state)) {
-    SafepointSynchronize::check_for_lazy_critical_native(_thread, state);
-    roll_forward(_at_safepoint);
-    return;
-  }
-  if (state == _thread_in_vm) {
-    roll_forward(_call_back);
+  if (safepoint_safe_with(_thread, stable_state)) {
+    check_for_lazy_critical_native(_thread, stable_state);
+    account_safe_thread();
@@ -1077,63 +1056,28 @@ void ThreadSafepointState::examine_state_of_thread() {
-// Returns true is thread could not be rolled forward at present position.
-void ThreadSafepointState::roll_forward(suspend_type type) {
-  _type = type;
-  switch(_type) {
-    case _at_safepoint:
-      SafepointSynchronize::signal_thread_at_safepoint();
-      DEBUG_ONLY(_thread->set_visited_for_critical_count(true));
-      if (_thread->in_critical()) {
-        // Notice that this thread is in a critical section
-        SafepointSynchronize::increment_jni_active_count();
-      }
-      break;
-    case _call_back:
-      set_has_called_back(false);
-      break;
-    case _running:
-    default:
-      ShouldNotReachHere();
+void ThreadSafepointState::account_safe_thread() {
+  SafepointSynchronize::decrement_waiting_to_block();
+  if (_thread->in_critical()) {
+    // Notice that this thread is in a critical section
+    SafepointSynchronize::increment_jni_active_count();
+  DEBUG_ONLY(_thread->set_visited_for_critical_count(SafepointSynchronize::safepoint_counter());)
+  assert(!_safepoint_safe, "Must be unsafe before safe");
+  _safepoint_safe = true;
 void ThreadSafepointState::restart() {
-  switch(type()) {
-    case _at_safepoint:
-    case _call_back:
-      break;
-    case _running:
-    default:
-       tty->print_cr("restart thread " INTPTR_FORMAT " with state %d",
-                     p2i(_thread), _type);
-       _thread->print();
-      ShouldNotReachHere();
-  }
-  _type = _running;
-  set_has_called_back(false);
+  assert(_safepoint_safe, "Must be safe before unsafe");
+  _safepoint_safe = false;
 void ThreadSafepointState::print_on(outputStream *st) const {
-  const char *s = NULL;
-  switch(_type) {
-    case _running                : s = "_running";              break;
-    case _at_safepoint           : s = "_at_safepoint";         break;
-    case _call_back              : s = "_call_back";            break;
-    default:
-      ShouldNotReachHere();
-  }
+  const char *s = _safepoint_safe ? "_at_safepoint" : "_running";
   st->print_cr("Thread: " INTPTR_FORMAT
-              "  [0x%2x] State: %s _has_called_back %d _at_poll_safepoint %d",
-               p2i(_thread), _thread->osthread()->thread_id(), s, _has_called_back,
-               _at_poll_safepoint);
+              "  [0x%2x] State: %s _at_poll_safepoint %d",
+               p2i(_thread), _thread->osthread()->thread_id(), s, _at_poll_safepoint);
@@ -1143,11 +1087,10 @@ void ThreadSafepointState::print_on(outputStream *st) const {
 // Block the thread at poll or poll return for safepoint/handshake.
 void ThreadSafepointState::handle_polling_page_exception() {
-  // Check state.  block() will set thread state to thread_in_vm which will
-  // cause the safepoint state _type to become _call_back.
-  suspend_type t = type();
-  assert(!SafepointMechanism::uses_global_page_poll() || t == ThreadSafepointState::_running,
-         "polling page exception on thread not running state: %u", uint(t));
+  // If we're using a global poll, then the thread should not be
+  // marked as safepoint safe yet.
+  assert(!SafepointMechanism::uses_global_page_poll() || !_safepoint_safe,
+         "polling page exception on thread safepoint safe");
   // Step 1: Find the nmethod from the return address
   address real_return_addr = thread()->saved_exception_pc();
diff --git a/src/hotspot/share/runtime/safepoint.hpp b/src/hotspot/share/runtime/safepoint.hpp
index c1715ad190f..4d2bf520c63 100644
--- a/src/hotspot/share/runtime/safepoint.hpp
+++ b/src/hotspot/share/runtime/safepoint.hpp
@@ -26,15 +26,15 @@
 #include "memory/allocation.hpp"
-#include "runtime/mutexLocker.hpp"
 #include "runtime/os.hpp"
-#include "utilities/globalDefinitions.hpp"
+#include "runtime/thread.hpp"
 #include "utilities/ostream.hpp"
+#include "utilities/waitBarrier.hpp"
 // Safepoint synchronization
-// The VMThread or CMS_thread uses the SafepointSynchronize::begin/end
+// The VMThread uses the SafepointSynchronize::begin/end
 // methods to enter/exit a safepoint region. The begin method will roll
 // all JavaThreads forward to a safepoint.
@@ -45,9 +45,7 @@
 // exit safepoint methods, when a thread is blocked/restarted. Hence, all mutex exter/
 // exit points *must* be at a safepoint.
 class ThreadSafepointState;
-class JavaThread;
 // Implements roll-forward to safepoint (safepoint synchronization)
@@ -55,21 +53,10 @@ class JavaThread;
 class SafepointSynchronize : AllStatic {
   enum SynchronizeState {
-      _not_synchronized = 0,                   // Threads not synchronized at a safepoint
-                                               // Keep this value 0. See the comment in do_call_back()
+      _not_synchronized = 0,                   // Threads not synchronized at a safepoint. Keep this value 0.
       _synchronizing    = 1,                   // Synchronizing in progress
-      _synchronized     = 2                    // All Java threads are stopped at a safepoint. Only VM thread is running
-  };
-  enum SafepointingThread {
-      _null_thread  = 0,
-      _vm_thread    = 1,
-      _other_thread = 2
-  };
-  enum SafepointTimeoutReason {
-    _spinning_timeout = 0,
-    _blocking_timeout = 1
+      _synchronized     = 2                    // All Java threads are running in native, blocked in OS or stopped at safepoint.
+                                               // VM thread and any NonJavaThread may be running.
   // The enums are listed in the order of the tasks when done serially.
@@ -86,22 +73,33 @@ class SafepointSynchronize : AllStatic {
-  static volatile SynchronizeState _state;     // Threads might read this flag directly, without acquiring the Threads_lock
-  static volatile int _waiting_to_block;       // number of threads we are waiting for to block
-  static int _current_jni_active_count;        // Counts the number of active critical natives during the safepoint
-  static int _defer_thr_suspend_loop_count;    // Iterations before blocking VM threads
+  friend class SafepointMechanism;
+  friend class ThreadSafepointState;
+  friend class HandshakeState;
+  enum SafepointTimeoutReason {
+    _spinning_timeout = 0,
+    _blocking_timeout = 1
+  };
+  // Threads might read this flag directly, without acquiring the Threads_lock:
+  static volatile SynchronizeState _state;
+  // Number of threads we are waiting for to block:
+  static int              _waiting_to_block;
+  // Counts the number of active critical natives during the safepoint:
+  static int              _current_jni_active_count;
   // This counter is used for fast versions of jni_Get<Primitive>Field.
-  // An even value means there is no ongoing safepoint operations.
+  // An even value means there are no ongoing safepoint operations.
   // The counter is incremented ONLY at the beginning and end of each
-  // safepoint. The fact that Threads_lock is held throughout each pair of
-  // increments (at the beginning and end of each safepoint) guarantees
-  // race freedom.
+  // safepoint.
   static volatile uint64_t _safepoint_counter;
-  static long              _end_of_last_safepoint;     // Time of last safepoint in milliseconds
-  static julong            _coalesced_vmop_count;     // coalesced vmop count
+  // JavaThreads that need to block for the safepoint will stop on the
+  // _wait_barrier, where they can quickly be started again.
+  static WaitBarrier* _wait_barrier;
+  static long         _end_of_last_safepoint;     // Time of last safepoint in milliseconds
+  static julong       _coalesced_vmop_count;     // coalesced vmop count
   // Statistics
   static void begin_statistics(int nof_threads, int nof_running);
@@ -114,42 +112,41 @@ private:
   // For debug long safepoint
   static void print_safepoint_timeout(SafepointTimeoutReason timeout_reason);
+  // Helper methods for safepoint procedure:
+  static void arm_safepoint();
+  static int synchronize_threads(jlong safepoint_limit_time, int nof_threads, int* initial_running);
+  static void disarm_safepoint();
+  static void increment_jni_active_count();
+  static void decrement_waiting_to_block();
+  // Used in safepoint_safe to do a stable load of the thread state.
+  static bool try_stable_load_state(JavaThreadState *state,
+                                    JavaThread *thread,
+                                    uint64_t safepoint_count);
+  // Called when a thread voluntarily blocks
+  static void block(JavaThread *thread);
+  // Called from VMThread during handshakes.
+  // If true the VMThread may safely process the handshake operation for the JavaThread.
+  static bool handshake_safe(JavaThread *thread);
-  // Main entry points
+  static void init(Thread* vmthread);
-  // Roll all threads forward to safepoint. Must be called by the
-  // VMThread or CMS_thread.
+  // Roll all threads forward to safepoint. Must be called by the VMThread.
   static void begin();
   static void end();                    // Start all suspended threads again...
-  static bool safepoint_safe(JavaThread *thread, JavaThreadState state);
-  static void check_for_lazy_critical_native(JavaThread *thread, JavaThreadState state);
+  // The value for a not set safepoint id.
+  static const uint64_t InactiveSafepointCounter;
   // Query
-  inline static bool is_at_safepoint()       { return _state == _synchronized; }
-  inline static bool is_synchronizing()      { return _state == _synchronizing; }
-  inline static uint64_t safepoint_counter() { return _safepoint_counter; }
-  inline static void increment_jni_active_count() {
-    assert_locked_or_safepoint(Safepoint_lock);
-    _current_jni_active_count++;
-  }
-  inline static bool do_call_back() {
-    return (_state != _not_synchronized);
-  }
-  // Called when a thread voluntarily blocks
-  static void   block(JavaThread *thread, bool block_in_safepoint_check = true);
-  friend class SafepointMechanism;
-  static void   signal_thread_at_safepoint()              { _waiting_to_block--; }
+  static bool is_at_safepoint()                   { return _state == _synchronized; }
+  static bool is_synchronizing()                  { return _state == _synchronizing; }
+  static uint64_t safepoint_counter()             { return _safepoint_counter; }
+  static bool is_same_safepoint(uint64_t counter) { return (SafepointSynchronize::safepoint_counter() - counter) < 2; }
   // Exception handling for page polling
   static void handle_polling_page_exception(JavaThread *thread);
@@ -164,13 +161,13 @@ public:
   static void do_cleanup_tasks();
   static void print_stat_on_exit();
-  inline static void inc_vmop_coalesced_count() { _coalesced_vmop_count++; }
+  static void inc_vmop_coalesced_count() { _coalesced_vmop_count++; }
-  static void set_is_at_safepoint()                        { _state = _synchronized; }
-  static void set_is_not_at_safepoint()                    { _state = _not_synchronized; }
+  static void set_is_at_safepoint()             { _state = _synchronized; }
+  static void set_is_not_at_safepoint()         { _state = _not_synchronized; }
   // Assembly support
-  static address address_of_state()                        { return (address)&_state; }
+  static address address_of_state()             { return (address)&_state; }
   // Only used for making sure that no safepoint has happened in
   // JNI_FastGetField. Therefore only the low 32-bits are needed
@@ -201,44 +198,43 @@ public:
 // State class for a thread suspended at a safepoint
 class ThreadSafepointState: public CHeapObj<mtInternal> {
- public:
-  // These states are maintained by VM thread while threads are being brought
-  // to a safepoint.  After SafepointSynchronize::end(), they are reset to
-  // _running.
-  enum suspend_type {
-    _running                =  0, // Thread state not yet determined (i.e., not at a safepoint yet)
-    _at_safepoint           =  1, // Thread at a safepoint (f.ex., when blocked on a lock)
-    _call_back              =  2  // Keep executing and wait for callback (if thread is in interpreted or vm)
-  };
-  volatile bool _at_poll_safepoint;  // At polling page safepoint (NOT a poll return safepoint)
-  // Thread has called back the safepoint code (for debugging)
-  bool                           _has_called_back;
+  // At polling page safepoint (NOT a poll return safepoint):
+  volatile bool                   _at_poll_safepoint;
+  JavaThread*                     _thread;
+  bool                            _safepoint_safe;
+  volatile uint64_t               _safepoint_id;
+  JavaThreadState                 _orig_thread_state;
-  JavaThread *                   _thread;
-  volatile suspend_type          _type;
-  JavaThreadState                _orig_thread_state;
+  ThreadSafepointState*           _next;
+  void account_safe_thread();
   ThreadSafepointState(JavaThread *thread);
-  // examine/roll-forward/restart
-  void examine_state_of_thread();
-  void roll_forward(suspend_type type);
+  // Linked list support:
+  ThreadSafepointState* get_next() const { return _next; }
+  void set_next(ThreadSafepointState* value) { _next = value; }
+  ThreadSafepointState** next_ptr() { return &_next; }
+  // examine/restart
+  void examine_state_of_thread(uint64_t safepoint_count);
   void restart();
   // Query
   JavaThread*  thread() const         { return _thread; }
-  suspend_type type() const           { return _type; }
-  bool         is_running() const     { return (_type==_running); }
+  bool         is_running() const     { return !_safepoint_safe; }
+  uint64_t get_safepoint_id() const;
+  void     reset_safepoint_id();
+  void     set_safepoint_id(uint64_t sid);
   JavaThreadState orig_thread_state() const { return _orig_thread_state; }
   // Support for safepoint timeout (debugging)
-  bool has_called_back() const                   { return _has_called_back; }
-  void set_has_called_back(bool val)             { _has_called_back = val; }
-  bool              is_at_poll_safepoint() { return _at_poll_safepoint; }
-  void              set_at_poll_safepoint(bool val) { _at_poll_safepoint = val; }
+  bool is_at_poll_safepoint()           { return _at_poll_safepoint; }
+  void set_at_poll_safepoint(bool val)  { _at_poll_safepoint = val; }
   void handle_polling_page_exception();
diff --git a/src/hotspot/share/runtime/safepointMechanism.cpp b/src/hotspot/share/runtime/safepointMechanism.cpp
index 5a21e514374..4cf957e3717 100644
--- a/src/hotspot/share/runtime/safepointMechanism.cpp
+++ b/src/hotspot/share/runtime/safepointMechanism.cpp
@@ -86,6 +86,9 @@ void SafepointMechanism::default_initialize() {
 void SafepointMechanism::block_if_requested_slow(JavaThread *thread) {
   // local poll already checked, if used.
   if (global_poll()) {
+    // Any load in ::block must not pass the global poll load.
+    // Otherwise we might load an old safepoint counter (for example).
+    OrderAccess::loadload();
   if (uses_thread_local_poll() && thread->has_handshake()) {
diff --git a/src/hotspot/share/runtime/safepointMechanism.hpp b/src/hotspot/share/runtime/safepointMechanism.hpp
index 073d96f4049..6f15b9ab196 100644
--- a/src/hotspot/share/runtime/safepointMechanism.hpp
+++ b/src/hotspot/share/runtime/safepointMechanism.hpp
@@ -78,9 +78,6 @@ public:
   // Blocks a thread until safepoint/handshake is completed.
   static inline void block_if_requested(JavaThread* thread);
-  // Calls back if there is a pending safepoint but does not block for it.
-  static inline void callback_if_safepoint(JavaThread* thread);
   // Caller is responsible for using a memory barrier if needed.
   static inline void arm_local_poll(JavaThread* thread);
   static inline void disarm_local_poll(JavaThread* thread);
diff --git a/src/hotspot/share/runtime/safepointMechanism.inline.hpp b/src/hotspot/share/runtime/safepointMechanism.inline.hpp
index 5ba31fe6ce9..f44fdf8ca49 100644
--- a/src/hotspot/share/runtime/safepointMechanism.inline.hpp
+++ b/src/hotspot/share/runtime/safepointMechanism.inline.hpp
@@ -35,7 +35,7 @@ bool SafepointMechanism::local_poll_armed(JavaThread* thread) {
 bool SafepointMechanism::global_poll() {
-  return SafepointSynchronize::do_call_back();
+  return (SafepointSynchronize::_state != SafepointSynchronize::_not_synchronized);
 bool SafepointMechanism::local_poll(Thread* thread) {
@@ -62,20 +62,6 @@ void SafepointMechanism::block_if_requested(JavaThread *thread) {
-void SafepointMechanism::callback_if_safepoint(JavaThread* thread) {
-  if (!uses_thread_local_poll() || local_poll_armed(thread)) {
-    // If using thread local polls, we should not check the
-    // global_poll() and callback via block() if the VMThread
-    // has not yet armed the local poll. Otherwise, when used in
-    // combination with should_block(), the latter could miss
-    // detecting the same safepoint that this method would detect
-    // if only checking global polls.
-    if (global_poll()) {
-      SafepointSynchronize::block(thread, false);
-    }
-  }
 void SafepointMechanism::arm_local_poll(JavaThread* thread) {
diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
index 066888a194e..12d42f6be22 100644
--- a/src/hotspot/share/runtime/thread.hpp
+++ b/src/hotspot/share/runtime/thread.hpp
@@ -421,11 +421,21 @@ class Thread: public ThreadShadow {
 #ifdef ASSERT
-  bool _visited_for_critical_count;
+  volatile uint64_t _visited_for_critical_count;
-  void set_visited_for_critical_count(bool z) { _visited_for_critical_count = z; }
-  bool was_visited_for_critical_count() const   { return _visited_for_critical_count; }
+  void set_visited_for_critical_count(uint64_t safepoint_id) {
+    assert(_visited_for_critical_count == 0, "Must be reset before set");
+    assert((safepoint_id & 0x1) == 1, "Must be odd");
+    _visited_for_critical_count = safepoint_id;
+  }
+  void reset_visited_for_critical_count(uint64_t safepoint_id) {
+    assert(_visited_for_critical_count == safepoint_id, "Was not visited");
+    _visited_for_critical_count = 0;
+  }
+  bool was_visited_for_critical_count(uint64_t safepoint_id) const {
+    return _visited_for_critical_count == safepoint_id;
+  }
diff --git a/src/hotspot/share/runtime/vmThread.cpp b/src/hotspot/share/runtime/vmThread.cpp
index 5adc089e9a4..1dfd0a1c2f0 100644
--- a/src/hotspot/share/runtime/vmThread.cpp
+++ b/src/hotspot/share/runtime/vmThread.cpp
@@ -458,6 +458,8 @@ bool VMThread::no_op_safepoint_needed(bool check_time) {
 void VMThread::loop() {
   assert(_cur_vm_operation == NULL, "no current one should be executing");
+  SafepointSynchronize::init(_vm_thread);
   while(true) {
     VM_Operation* safepoint_ops = NULL;
diff --git a/src/hotspot/share/services/runtimeService.cpp b/src/hotspot/share/services/runtimeService.cpp
index 30dcdf41a19..4386f940a5e 100644
--- a/src/hotspot/share/services/runtimeService.cpp
+++ b/src/hotspot/share/services/runtimeService.cpp
@@ -1,5 +1,5 @@
- * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "classfile/classLoader.hpp"
 #include "logging/log.hpp"
+#include "runtime/timer.hpp"
 #include "runtime/vm_version.hpp"
 #include "services/attachListener.hpp"
 #include "services/management.hpp"
@@ -40,7 +41,9 @@ PerfCounter*  RuntimeService::_sync_time_ticks = NULL;
 PerfCounter*  RuntimeService::_total_safepoints = NULL;
 PerfCounter*  RuntimeService::_safepoint_time_ticks = NULL;
 PerfCounter*  RuntimeService::_application_time_ticks = NULL;
-double RuntimeService::_last_safepoint_sync_time_sec = 0.0;
+jlong RuntimeService::_last_safepoint_sync_time_ns = 0;
+jlong RuntimeService::_last_safepoint_end_time_ns = 0;
+jlong RuntimeService::_last_app_time_ns = 0;
 void RuntimeService::init() {
@@ -89,12 +92,14 @@ void RuntimeService::record_safepoint_begin() {
   // Print the time interval in which the app was executing
   if (_app_timer.is_updated()) {
-    log_info(safepoint)("Application time: %3.7f seconds", last_application_time_sec());
+    _last_app_time_ns = _app_timer.ticks_since_update();
+    log_info(safepoint)("Application time: %3.7f seconds", TimeHelper::counter_to_seconds(_last_app_time_ns));
   // update the time stamp to begin recording safepoint time
+  _last_safepoint_sync_time_ns = 0;
+  _last_safepoint_end_time_ns = 0;
-  _last_safepoint_sync_time_sec = 0.0;
   if (UsePerfData) {
     if (_app_timer.is_updated()) {
@@ -107,18 +112,24 @@ void RuntimeService::record_safepoint_synchronized() {
   if (UsePerfData) {
-  if (log_is_enabled(Info, safepoint)) {
-    _last_safepoint_sync_time_sec = last_safepoint_time_sec();
+  if (log_is_enabled(Info, safepoint) || log_is_enabled(Info, safepoint, stats)) {
+    _last_safepoint_sync_time_ns = _safepoint_timer.ticks_since_update();
 void RuntimeService::record_safepoint_end() {
-  // Print the time interval for which the app was stopped
-  // during the current safepoint operation.
-  log_info(safepoint)("Total time for which application threads were stopped: %3.7f seconds, Stopping threads took: %3.7f seconds",
-                      last_safepoint_time_sec(), _last_safepoint_sync_time_sec);
+  // Logging of safepoint+stats=info needs _last_safepoint_end_time_ns to be set.
+  // Logging of safepoint=info needs _last_safepoint_end_time_ns for following log.
+  if (log_is_enabled(Info, safepoint) || log_is_enabled(Info, safepoint, stats)) {
+    _last_safepoint_end_time_ns = _safepoint_timer.ticks_since_update();
+    log_info(safepoint)(
+       "Total time for which application threads were stopped: %3.7f seconds, "
+       "Stopping threads took: %3.7f seconds",
+       TimeHelper::counter_to_seconds(_last_safepoint_end_time_ns),
+       TimeHelper::counter_to_seconds(_last_safepoint_sync_time_ns));
+  }
   // update the time stamp to begin recording app time
@@ -127,6 +138,25 @@ void RuntimeService::record_safepoint_end() {
+void RuntimeService::record_safepoint_epilog(const char* operation_name) {
+  if (!log_is_enabled(Info, safepoint, stats)) {
+    return;
+  }
+  log_info(safepoint, stats)(
+     "Safepoint \"%s\", "
+     "Time since last: " JLONG_FORMAT " ns; "
+     "Reaching safepoint: " JLONG_FORMAT " ns; "
+     "At safepoint: " JLONG_FORMAT " ns; "
+     "Total: " JLONG_FORMAT " ns",
+      operation_name,
+      _last_app_time_ns,
+      _last_safepoint_sync_time_ns,
+      _last_safepoint_end_time_ns - _last_safepoint_sync_time_ns,
+      _last_safepoint_end_time_ns
+     );
 void RuntimeService::record_application_start() {
   // update the time stamp to begin recording app time
diff --git a/src/hotspot/share/services/runtimeService.hpp b/src/hotspot/share/services/runtimeService.hpp
index b18f2cc6e5d..0817fd5485d 100644
--- a/src/hotspot/share/services/runtimeService.hpp
+++ b/src/hotspot/share/services/runtimeService.hpp
@@ -37,7 +37,9 @@ private:
   static TimeStamp _safepoint_timer;
   static TimeStamp _app_timer;
-  static double _last_safepoint_sync_time_sec;
+  static jlong _last_safepoint_sync_time_ns;
+  static jlong _last_safepoint_end_time_ns;
+  static jlong _last_app_time_ns;
   static void init();
@@ -47,13 +49,11 @@ public:
   static jlong safepoint_time_ms();
   static jlong application_time_ms();
-  static double last_safepoint_time_sec()      { return _safepoint_timer.seconds(); }
-  static double last_application_time_sec()    { return _app_timer.seconds(); }
   // callbacks
   static void record_safepoint_begin() NOT_MANAGEMENT_RETURN;
   static void record_safepoint_synchronized() NOT_MANAGEMENT_RETURN;
   static void record_safepoint_end() NOT_MANAGEMENT_RETURN;
+  static void record_safepoint_epilog(const char* operation_name) NOT_MANAGEMENT_RETURN;
   static void record_application_start() NOT_MANAGEMENT_RETURN;
diff --git a/test/hotspot/jtreg/runtime/logging/SafepointTest.java b/test/hotspot/jtreg/runtime/logging/SafepointTest.java
index 2509133742b..f4f36f8a256 100644
--- a/test/hotspot/jtreg/runtime/logging/SafepointTest.java
+++ b/test/hotspot/jtreg/runtime/logging/SafepointTest.java
@@ -1,5 +1,5 @@
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
  * This code is free software; you can redistribute it and/or modify it
@@ -40,10 +40,9 @@ public class SafepointTest {
         ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-Xlog:safepoint=trace",
         OutputAnalyzer output = new OutputAnalyzer(pb.start());
-        output.shouldContain("Safepoint synchronization initiated. (");
+        output.shouldContain("Safepoint synchronization initiated");
         output.shouldContain("Entering safepoint region: ");
         output.shouldContain("Leaving safepoint region");
-        output.shouldContain("_at_poll_safepoint");