8245721: Refactor the TaskTerminator

Improve the structure of the TaskTerminator code to make it more understandable and amenable to improvements.

Reviewed-by: zgu, kbarrett, lkorinth
This commit is contained in:
Thomas Schatzl 2020-08-17 11:23:54 +02:00
parent f1a5cbb11e
commit 55e3560abc
2 changed files with 136 additions and 139 deletions
src/hotspot/share/gc/shared

@ -28,15 +28,52 @@
#include "gc/shared/taskTerminator.hpp"
#include "gc/shared/taskqueue.hpp"
#include "logging/log.hpp"
#include "runtime/globals.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/thread.hpp"
TaskTerminator::DelayContext::DelayContext() {
_yield_count = 0;
reset_hard_spin_information();
}
void TaskTerminator::DelayContext::reset_hard_spin_information() {
_hard_spin_count = 0;
_hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio;
}
bool TaskTerminator::DelayContext::needs_sleep() const {
return _yield_count >= WorkStealingYieldsBeforeSleep;
}
void TaskTerminator::DelayContext::do_step() {
assert(_yield_count < WorkStealingYieldsBeforeSleep, "Number of yields too large");
// Each spin iteration is counted as a yield for purposes of
// deciding when to sleep.
_yield_count++;
// Periodically yield instead of spinning after WorkStealingSpinToYieldRatio
// spins.
if (_hard_spin_count > WorkStealingSpinToYieldRatio) {
os::naked_yield();
reset_hard_spin_information();
} else {
// Hard spin this time
for (uint j = 0; j < _hard_spin_limit; j++) {
SpinPause();
}
_hard_spin_count++;
// Increase the hard spinning period but only up to a limit.
_hard_spin_limit = MIN2(2 * _hard_spin_limit,
(uint) WorkStealingHardSpins);
}
}
TaskTerminator::TaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) :
_n_threads(n_threads),
_queue_set(queue_set),
_offered_termination(0),
_spin_master(NULL) {
_blocker = new Monitor(Mutex::leaf, "TaskTerminator", false, Monitor::_safepoint_check_never);
}
_blocker(Mutex::leaf, "TaskTerminator", false, Monitor::_safepoint_check_never),
_spin_master(NULL) { }
TaskTerminator::~TaskTerminator() {
if (_offered_termination != 0) {
@ -45,8 +82,6 @@ TaskTerminator::~TaskTerminator() {
}
assert(_spin_master == NULL, "Should have been reset");
assert(_blocker != NULL, "Can not be NULL");
delete _blocker;
}
#ifdef ASSERT
@ -55,15 +90,11 @@ void TaskTerminator::assert_queue_set_empty() const {
}
#endif
void TaskTerminator::yield() {
assert(_offered_termination <= _n_threads, "Invariant");
os::naked_yield();
}
void TaskTerminator::reset_for_reuse() {
if (_offered_termination != 0) {
assert(_offered_termination == _n_threads,
"Terminator may still be in use");
"Only %u of %u threads offered termination", _offered_termination, _n_threads);
assert(_spin_master == NULL, "Leftover spin master " PTR_FORMAT, p2i(_spin_master));
_offered_termination = 0;
}
}
@ -81,10 +112,27 @@ size_t TaskTerminator::tasks_in_queue_set() const {
return _queue_set->tasks();
}
void TaskTerminator::prepare_for_return(Thread* this_thread, size_t tasks) {
assert(_blocker.is_locked(), "must be");
assert(_blocker.owned_by_self(), "must be");
assert(_offered_termination >= 1, "must be");
if (_spin_master == this_thread) {
_spin_master = NULL;
}
if (tasks >= _offered_termination - 1) {
_blocker.notify_all();
} else {
for (; tasks > 1; tasks--) {
_blocker.notify();
}
}
}
bool TaskTerminator::offer_termination(TerminatorTerminator* terminator) {
assert(_n_threads > 0, "Initialization is incorrect");
assert(_offered_termination < _n_threads, "Invariant");
assert(_blocker != NULL, "Invariant");
// Single worker, done
if (_n_threads == 1) {
@ -93,136 +141,64 @@ bool TaskTerminator::offer_termination(TerminatorTerminator* terminator) {
return true;
}
_blocker->lock_without_safepoint_check();
Thread* the_thread = Thread::current();
MonitorLocker x(&_blocker, Mutex::_no_safepoint_check_flag);
_offered_termination++;
// All arrived, done
if (_offered_termination == _n_threads) {
_blocker->notify_all();
_blocker->unlock();
prepare_for_return(the_thread);
assert_queue_set_empty();
return true;
}
Thread* the_thread = Thread::current();
while (true) {
for (;;) {
if (_spin_master == NULL) {
_spin_master = the_thread;
DelayContext delay_context;
_blocker->unlock();
if (do_spin_master_work(terminator)) {
assert(_offered_termination == _n_threads, "termination condition");
assert_queue_set_empty();
return true;
} else {
_blocker->lock_without_safepoint_check();
// There is possibility that termination is reached between dropping the lock
// before returning from do_spin_master_work() and acquiring lock above.
while (!delay_context.needs_sleep()) {
size_t tasks;
bool should_exit_termination;
{
MutexUnlocker y(&_blocker, Mutex::_no_safepoint_check_flag);
delay_context.do_step();
// Intentionally read the number of tasks outside the mutex since this
// is potentially a long operation making the locked section long.
tasks = tasks_in_queue_set();
should_exit_termination = exit_termination(tasks, terminator);
}
// Immediately check exit conditions after re-acquiring the lock.
if (_offered_termination == _n_threads) {
_blocker->unlock();
prepare_for_return(the_thread);
assert_queue_set_empty();
return true;
} else if (should_exit_termination) {
prepare_for_return(the_thread, tasks);
_offered_termination--;
return false;
}
}
} else {
_blocker->wait_without_safepoint_check(WorkStealingSleepMillis);
if (_offered_termination == _n_threads) {
_blocker->unlock();
assert_queue_set_empty();
return true;
}
}
size_t tasks = tasks_in_queue_set();
if (exit_termination(tasks, terminator)) {
assert_lock_strong(_blocker);
_offered_termination--;
_blocker->unlock();
return false;
}
}
}
bool TaskTerminator::do_spin_master_work(TerminatorTerminator* terminator) {
uint yield_count = 0;
// Number of hard spin loops done since last yield
uint hard_spin_count = 0;
// Number of iterations in the hard spin loop.
uint hard_spin_limit = WorkStealingHardSpins;
// If WorkStealingSpinToYieldRatio is 0, no hard spinning is done.
// If it is greater than 0, then start with a small number
// of spins and increase number with each turn at spinning until
// the count of hard spins exceeds WorkStealingSpinToYieldRatio.
// Then do a yield() call and start spinning afresh.
if (WorkStealingSpinToYieldRatio > 0) {
hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio;
hard_spin_limit = MAX2(hard_spin_limit, 1U);
}
// Remember the initial spin limit.
uint hard_spin_start = hard_spin_limit;
// Loop waiting for all threads to offer termination or
// more work.
while (true) {
// Look for more work.
// Periodically sleep() instead of yield() to give threads
// waiting on the cores the chance to grab this code
if (yield_count <= WorkStealingYieldsBeforeSleep) {
// Do a yield or hardspin. For purposes of deciding whether
// to sleep, count this as a yield.
yield_count++;
// Periodically call yield() instead spinning
// After WorkStealingSpinToYieldRatio spins, do a yield() call
// and reset the counts and starting limit.
if (hard_spin_count > WorkStealingSpinToYieldRatio) {
yield();
hard_spin_count = 0;
hard_spin_limit = hard_spin_start;
} else {
// Hard spin this time
// Increase the hard spinning period but only up to a limit.
hard_spin_limit = MIN2(2*hard_spin_limit,
(uint) WorkStealingHardSpins);
for (uint j = 0; j < hard_spin_limit; j++) {
SpinPause();
}
hard_spin_count++;
}
} else {
log_develop_trace(gc, task)("TaskTerminator::do_spin_master_work() thread " PTR_FORMAT " sleeps after %u yields",
p2i(Thread::current()), yield_count);
yield_count = 0;
MonitorLocker locker(_blocker, Mutex::_no_safepoint_check_flag);
// Give up spin master before sleeping.
_spin_master = NULL;
locker.wait(WorkStealingSleepMillis);
if (_spin_master == NULL) {
_spin_master = Thread::current();
} else {
return false;
}
}
bool timed_out = x.wait(WorkStealingSleepMillis);
size_t tasks = tasks_in_queue_set();
bool exit = exit_termination(tasks, terminator);
{
MonitorLocker locker(_blocker, Mutex::_no_safepoint_check_flag);
// Termination condition reached
if (_offered_termination == _n_threads) {
_spin_master = NULL;
return true;
} else if (exit) {
if (tasks >= _offered_termination - 1) {
locker.notify_all();
} else {
for (; tasks > 1; tasks--) {
locker.notify();
}
}
_spin_master = NULL;
// Immediately check exit conditions after re-acquiring the lock.
if (_offered_termination == _n_threads) {
prepare_for_return(the_thread);
assert_queue_set_empty();
return true;
} else if (!timed_out) {
// We were woken up. Don't bother waking up more tasks.
prepare_for_return(the_thread, 0);
_offered_termination--;
return false;
} else {
size_t tasks = tasks_in_queue_set();
if (exit_termination(tasks, terminator)) {
prepare_for_return(the_thread, tasks);
_offered_termination--;
return false;
}
}

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
* Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -26,11 +26,12 @@
#define SHARE_GC_SHARED_TASKTERMINATOR_HPP
#include "memory/allocation.hpp"
#include "memory/padded.hpp"
#include "runtime/mutex.hpp"
#include "runtime/thread.hpp"
class TaskQueueSetSuper;
class TerminatorTerminator;
class Thread;
/*
* Provides a task termination protocol.
@ -44,12 +45,30 @@ class TerminatorTerminator;
* SIGPLAN International Symposium on Memory Management (ISMM 2016). ACM,
* New York, NY, USA, 46-54. DOI: https://doi.org/10.1145/2926697.2926706"
*
* Instead of a dedicated spin-master, our implementation will let spin-master relinquish
* the role before it goes to sleep/wait, allowing newly arrived threads to compete for the role.
* The intention of above enhancement is to reduce spin-master's latency on detecting new tasks
* for stealing and termination condition.
* Instead of a dedicated spin-master, our implementation will let spin-master
* relinquish the role before it goes to sleep/wait, allowing newly arrived
* threads to compete for the role.
* The intention of above enhancement is to reduce spin-master's latency on
* detecting new tasks for stealing and termination condition.
*/
class TaskTerminator : public CHeapObj<mtGC> {
class DelayContext {
uint _yield_count;
// Number of hard spin loops done since last yield
uint _hard_spin_count;
// Number of iterations in the current hard spin loop.
uint _hard_spin_limit;
void reset_hard_spin_information();
public:
DelayContext();
// Should the caller sleep (wait) or perform a spin step?
bool needs_sleep() const;
// Perform one delay iteration.
void do_step();
};
uint _n_threads;
TaskQueueSetSuper* _queue_set;
@ -57,21 +76,23 @@ class TaskTerminator : public CHeapObj<mtGC> {
volatile uint _offered_termination;
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile uint));
Monitor _blocker;
Thread* _spin_master;
void assert_queue_set_empty() const NOT_DEBUG_RETURN;
void yield();
Monitor* _blocker;
Thread* _spin_master;
// Prepare for return from offer_termination. Gives up the spin master token
// and wakes up up to tasks threads waiting on _blocker (the default value
// means to wake up everyone).
void prepare_for_return(Thread* this_thread, size_t tasks = SIZE_MAX);
// If we should exit current termination protocol
bool exit_termination(size_t tasks, TerminatorTerminator* terminator);
size_t tasks_in_queue_set() const;
// Perform spin-master task.
// Return true if termination condition is detected, otherwise return false
bool do_spin_master_work(TerminatorTerminator* terminator);
// Perform one iteration of spin-master work.
void do_delay_step(DelayContext& delay_context);
NONCOPYABLE(TaskTerminator);