8245721: Refactor the TaskTerminator

Improve the structure of the TaskTerminator code to make it more understandable and amenable to improvements. Reviewed-by: zgu, kbarrett, lkorinth
2020-08-17 11:23:54 +02:00 · 2020-08-17 11:23:54 +02:00 · 55e3560abc
commit 55e3560abc
parent f1a5cbb11e
2 changed files with 136 additions and 139 deletions
--- a/src/hotspot/share/gc/shared/taskTerminator.cpp
+++ b/src/hotspot/share/gc/shared/taskTerminator.cpp
@ -28,15 +28,52 @@
 #include "gc/shared/taskTerminator.hpp"
 #include "gc/shared/taskqueue.hpp"
 #include "logging/log.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/thread.hpp"
+
+TaskTerminator::DelayContext::DelayContext() {
+  _yield_count = 0;
+  reset_hard_spin_information();
+}
+
+void TaskTerminator::DelayContext::reset_hard_spin_information() {
+  _hard_spin_count = 0;
+  _hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio;
+}
+
+bool TaskTerminator::DelayContext::needs_sleep() const {
+  return _yield_count >= WorkStealingYieldsBeforeSleep;
+}
+
+void TaskTerminator::DelayContext::do_step() {
+  assert(_yield_count < WorkStealingYieldsBeforeSleep, "Number of yields too large");
+  // Each spin iteration is counted as a yield for purposes of
+  // deciding when to sleep.
+  _yield_count++;
+  // Periodically yield instead of spinning after WorkStealingSpinToYieldRatio
+  // spins.
+  if (_hard_spin_count > WorkStealingSpinToYieldRatio) {
+    os::naked_yield();
+    reset_hard_spin_information();
+  } else {
+    // Hard spin this time
+    for (uint j = 0; j < _hard_spin_limit; j++) {
+      SpinPause();
+    }
+    _hard_spin_count++;
+    // Increase the hard spinning period but only up to a limit.
+    _hard_spin_limit = MIN2(2 * _hard_spin_limit,
+                            (uint) WorkStealingHardSpins);
+  }
+}

 TaskTerminator::TaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) :
  _n_threads(n_threads),
  _queue_set(queue_set),
  _offered_termination(0),
-  _spin_master(NULL) {
-
-  _blocker = new Monitor(Mutex::leaf, "TaskTerminator", false, Monitor::_safepoint_check_never);
-}
+  _blocker(Mutex::leaf, "TaskTerminator", false, Monitor::_safepoint_check_never),
+  _spin_master(NULL) { }

 TaskTerminator::~TaskTerminator() {
  if (_offered_termination != 0) {
@ -45,8 +82,6 @@ TaskTerminator::~TaskTerminator() {
  }

  assert(_spin_master == NULL, "Should have been reset");
-  assert(_blocker != NULL, "Can not be NULL");
-  delete _blocker;
 }

 #ifdef ASSERT
@ -55,15 +90,11 @@ void TaskTerminator::assert_queue_set_empty() const {
 }
 #endif

-void TaskTerminator::yield() {
-  assert(_offered_termination <= _n_threads, "Invariant");
-  os::naked_yield();
-}
-
 void TaskTerminator::reset_for_reuse() {
  if (_offered_termination != 0) {
    assert(_offered_termination == _n_threads,
-           "Terminator may still be in use");
+           "Only %u of %u threads offered termination", _offered_termination, _n_threads);
+    assert(_spin_master == NULL, "Leftover spin master " PTR_FORMAT, p2i(_spin_master));
    _offered_termination = 0;
  }
 }
@ -81,10 +112,27 @@ size_t TaskTerminator::tasks_in_queue_set() const {
  return _queue_set->tasks();
 }

+void TaskTerminator::prepare_for_return(Thread* this_thread, size_t tasks) {
+  assert(_blocker.is_locked(), "must be");
+  assert(_blocker.owned_by_self(), "must be");
+  assert(_offered_termination >= 1, "must be");
+
+  if (_spin_master == this_thread) {
+    _spin_master = NULL;
+  }
+
+  if (tasks >= _offered_termination - 1) {
+    _blocker.notify_all();
+  } else {
+    for (; tasks > 1; tasks--) {
+      _blocker.notify();
+    }
+  }
+}
+
 bool TaskTerminator::offer_termination(TerminatorTerminator* terminator) {
  assert(_n_threads > 0, "Initialization is incorrect");
  assert(_offered_termination < _n_threads, "Invariant");
-  assert(_blocker != NULL, "Invariant");

  // Single worker, done
  if (_n_threads == 1) {
@ -93,136 +141,64 @@ bool TaskTerminator::offer_termination(TerminatorTerminator* terminator) {
    return true;
  }

-  _blocker->lock_without_safepoint_check();
+  Thread* the_thread = Thread::current();
+
+  MonitorLocker x(&_blocker, Mutex::_no_safepoint_check_flag);
  _offered_termination++;
-  // All arrived, done
+
  if (_offered_termination == _n_threads) {
-    _blocker->notify_all();
-    _blocker->unlock();
+    prepare_for_return(the_thread);
    assert_queue_set_empty();
    return true;
  }

-  Thread* the_thread = Thread::current();
-  while (true) {
+  for (;;) {
    if (_spin_master == NULL) {
      _spin_master = the_thread;
+      DelayContext delay_context;

-      _blocker->unlock();
-
-      if (do_spin_master_work(terminator)) {
-        assert(_offered_termination == _n_threads, "termination condition");
-        assert_queue_set_empty();
-        return true;
-      } else {
-        _blocker->lock_without_safepoint_check();
-        // There is possibility that termination is reached between dropping the lock
-        // before returning from do_spin_master_work() and acquiring lock above.
+      while (!delay_context.needs_sleep()) {
+        size_t tasks;
+        bool should_exit_termination;
+        {
+          MutexUnlocker y(&_blocker, Mutex::_no_safepoint_check_flag);
+          delay_context.do_step();
+          // Intentionally read the number of tasks outside the mutex since this
+          // is potentially a long operation making the locked section long.
+          tasks = tasks_in_queue_set();
+          should_exit_termination = exit_termination(tasks, terminator);
+        }
+        // Immediately check exit conditions after re-acquiring the lock.
        if (_offered_termination == _n_threads) {
-          _blocker->unlock();
+          prepare_for_return(the_thread);
          assert_queue_set_empty();
          return true;
+        } else if (should_exit_termination) {
+          prepare_for_return(the_thread, tasks);
+          _offered_termination--;
+          return false;
        }
      }
-    } else {
-      _blocker->wait_without_safepoint_check(WorkStealingSleepMillis);
-
-      if (_offered_termination == _n_threads) {
-        _blocker->unlock();
-        assert_queue_set_empty();
-        return true;
-      }
-    }
-
-    size_t tasks = tasks_in_queue_set();
-    if (exit_termination(tasks, terminator)) {
-      assert_lock_strong(_blocker);
-      _offered_termination--;
-      _blocker->unlock();
-      return false;
-    }
-  }
-}
-
-bool TaskTerminator::do_spin_master_work(TerminatorTerminator* terminator) {
-  uint yield_count = 0;
-  // Number of hard spin loops done since last yield
-  uint hard_spin_count = 0;
-  // Number of iterations in the hard spin loop.
-  uint hard_spin_limit = WorkStealingHardSpins;
-
-  // If WorkStealingSpinToYieldRatio is 0, no hard spinning is done.
-  // If it is greater than 0, then start with a small number
-  // of spins and increase number with each turn at spinning until
-  // the count of hard spins exceeds WorkStealingSpinToYieldRatio.
-  // Then do a yield() call and start spinning afresh.
-  if (WorkStealingSpinToYieldRatio > 0) {
-    hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio;
-    hard_spin_limit = MAX2(hard_spin_limit, 1U);
-  }
-  // Remember the initial spin limit.
-  uint hard_spin_start = hard_spin_limit;
-
-  // Loop waiting for all threads to offer termination or
-  // more work.
-  while (true) {
-    // Look for more work.
-    // Periodically sleep() instead of yield() to give threads
-    // waiting on the cores the chance to grab this code
-    if (yield_count <= WorkStealingYieldsBeforeSleep) {
-      // Do a yield or hardspin.  For purposes of deciding whether
-      // to sleep, count this as a yield.
-      yield_count++;
-
-      // Periodically call yield() instead spinning
-      // After WorkStealingSpinToYieldRatio spins, do a yield() call
-      // and reset the counts and starting limit.
-      if (hard_spin_count > WorkStealingSpinToYieldRatio) {
-        yield();
-        hard_spin_count = 0;
-        hard_spin_limit = hard_spin_start;
-      } else {
-        // Hard spin this time
-        // Increase the hard spinning period but only up to a limit.
-        hard_spin_limit = MIN2(2*hard_spin_limit,
-                               (uint) WorkStealingHardSpins);
-        for (uint j = 0; j < hard_spin_limit; j++) {
-          SpinPause();
-        }
-        hard_spin_count++;
-      }
-    } else {
-      log_develop_trace(gc, task)("TaskTerminator::do_spin_master_work() thread " PTR_FORMAT " sleeps after %u yields",
-                                  p2i(Thread::current()), yield_count);
-      yield_count = 0;
-
-      MonitorLocker locker(_blocker, Mutex::_no_safepoint_check_flag);
+      // Give up spin master before sleeping.
      _spin_master = NULL;
-      locker.wait(WorkStealingSleepMillis);
-      if (_spin_master == NULL) {
-        _spin_master = Thread::current();
-      } else {
-        return false;
-      }
    }
+    bool timed_out = x.wait(WorkStealingSleepMillis);

-    size_t tasks = tasks_in_queue_set();
-    bool exit = exit_termination(tasks, terminator);
-    {
-      MonitorLocker locker(_blocker, Mutex::_no_safepoint_check_flag);
-      // Termination condition reached
-      if (_offered_termination == _n_threads) {
-        _spin_master = NULL;
-        return true;
-      } else if (exit) {
-        if (tasks >= _offered_termination - 1) {
-          locker.notify_all();
-        } else {
-          for (; tasks > 1; tasks--) {
-            locker.notify();
-          }
-        }
-        _spin_master = NULL;
+    // Immediately check exit conditions after re-acquiring the lock.
+    if (_offered_termination == _n_threads) {
+      prepare_for_return(the_thread);
+      assert_queue_set_empty();
+      return true;
+    } else if (!timed_out) {
+      // We were woken up. Don't bother waking up more tasks.
+      prepare_for_return(the_thread, 0);
+      _offered_termination--;
+      return false;
+    } else {
+      size_t tasks = tasks_in_queue_set();
+      if (exit_termination(tasks, terminator)) {
+        prepare_for_return(the_thread, tasks);
+        _offered_termination--;
        return false;
      }
    }
--- a/src/hotspot/share/gc/shared/taskTerminator.hpp
+++ b/src/hotspot/share/gc/shared/taskTerminator.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
 * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
@ -26,11 +26,12 @@
 #define SHARE_GC_SHARED_TASKTERMINATOR_HPP

 #include "memory/allocation.hpp"
+#include "memory/padded.hpp"
 #include "runtime/mutex.hpp"
-#include "runtime/thread.hpp"

 class TaskQueueSetSuper;
 class TerminatorTerminator;
+class Thread;

 /*
 * Provides a task termination protocol.
@ -44,12 +45,30 @@ class TerminatorTerminator;
 * SIGPLAN International Symposium on Memory Management (ISMM 2016). ACM,
 * New York, NY, USA, 46-54. DOI: https://doi.org/10.1145/2926697.2926706"
 *
- * Instead of a dedicated spin-master, our implementation will let spin-master relinquish
- * the role before it goes to sleep/wait, allowing newly arrived threads to compete for the role.
- * The intention of above enhancement is to reduce spin-master's latency on detecting new tasks
- * for stealing and termination condition.
+ * Instead of a dedicated spin-master, our implementation will let spin-master
+ * relinquish the role before it goes to sleep/wait, allowing newly arrived
+ * threads to compete for the role.
+ * The intention of above enhancement is to reduce spin-master's latency on
+ * detecting new tasks for stealing and termination condition.
 */
 class TaskTerminator : public CHeapObj<mtGC> {
+  class DelayContext {
+    uint _yield_count;
+    // Number of hard spin loops done since last yield
+    uint _hard_spin_count;
+    // Number of iterations in the current hard spin loop.
+    uint _hard_spin_limit;
+
+    void reset_hard_spin_information();
+  public:
+    DelayContext();
+
+    // Should the caller sleep (wait) or perform a spin step?
+    bool needs_sleep() const;
+    // Perform one delay iteration.
+    void do_step();
+  };
+
  uint _n_threads;
  TaskQueueSetSuper* _queue_set;

@ -57,21 +76,23 @@ class TaskTerminator : public CHeapObj<mtGC> {
  volatile uint _offered_termination;
  DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile uint));

+  Monitor _blocker;
+  Thread* _spin_master;
+
  void assert_queue_set_empty() const NOT_DEBUG_RETURN;

-  void yield();
-
-  Monitor*    _blocker;
-  Thread*     _spin_master;
+  // Prepare for return from offer_termination. Gives up the spin master token
+  // and wakes up up to tasks threads waiting on _blocker (the default value
+  // means to wake up everyone).
+  void prepare_for_return(Thread* this_thread, size_t tasks = SIZE_MAX);

  // If we should exit current termination protocol
  bool exit_termination(size_t tasks, TerminatorTerminator* terminator);

  size_t tasks_in_queue_set() const;

-  // Perform spin-master task.
-  // Return true if termination condition is detected, otherwise return false
-  bool do_spin_master_work(TerminatorTerminator* terminator);
+  // Perform one iteration of spin-master work.
+  void do_delay_step(DelayContext& delay_context);

  NONCOPYABLE(TaskTerminator);