8215297: Remove ParallelTaskTerminator

Remove ParallelTaskTerminator as the alternate OWSTTaskTerminator algorithm has worked well for more than a year now.

Reviewed-by: zgu, sjohanss
This commit is contained in:
Thomas Schatzl 2020-02-03 10:45:43 +01:00
parent 4b8a5f991f
commit 76675e93cf
21 changed files with 186 additions and 343 deletions

@ -76,6 +76,7 @@
#include "gc/shared/isGCActiveMark.hpp"
#include "gc/shared/locationPrinter.inline.hpp"
#include "gc/shared/oopStorageParState.hpp"
#include "gc/shared/owstTaskTerminator.hpp"
#include "gc/shared/preservedMarks.inline.hpp"
#include "gc/shared/suspendibleThreadSet.hpp"
#include "gc/shared/referenceProcessor.inline.hpp"
@ -1132,7 +1133,7 @@ void G1CollectedHeap::print_heap_after_full_collection(G1HeapTransition* heap_tr
print_heap_after_gc();
print_heap_regions();
#ifdef TRACESPINNING
ParallelTaskTerminator::print_termination_counts();
OWSTTaskTerminator::print_termination_counts();
#endif
}
@ -3140,7 +3141,7 @@ void G1CollectedHeap::do_collection_pause_at_safepoint_helper(double target_paus
verify_after_young_collection(verify_type);
#ifdef TRACESPINNING
ParallelTaskTerminator::print_termination_counts();
OWSTTaskTerminator::print_termination_counts();
#endif
gc_epilogue(false);
@ -3476,14 +3477,14 @@ class G1STWRefProcTaskProxy: public AbstractGangTask {
G1CollectedHeap* _g1h;
G1ParScanThreadStateSet* _pss;
RefToScanQueueSet* _task_queues;
ParallelTaskTerminator* _terminator;
OWSTTaskTerminator* _terminator;
public:
G1STWRefProcTaskProxy(ProcessTask& proc_task,
G1CollectedHeap* g1h,
G1ParScanThreadStateSet* per_thread_states,
RefToScanQueueSet *task_queues,
ParallelTaskTerminator* terminator) :
OWSTTaskTerminator* terminator) :
AbstractGangTask("Process reference objects in parallel"),
_proc_task(proc_task),
_g1h(g1h),
@ -3527,8 +3528,8 @@ void G1STWRefProcTaskExecutor::execute(ProcessTask& proc_task, uint ergo_workers
assert(_workers->active_workers() >= ergo_workers,
"Ergonomically chosen workers (%u) should be less than or equal to active workers (%u)",
ergo_workers, _workers->active_workers());
TaskTerminator terminator(ergo_workers, _queues);
G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _pss, _queues, terminator.terminator());
OWSTTaskTerminator terminator(ergo_workers, _queues);
G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _pss, _queues, &terminator);
_workers->run_task(&proc_task_proxy, ergo_workers);
}
@ -3814,7 +3815,7 @@ protected:
G1CollectedHeap* _g1h;
G1ParScanThreadStateSet* _per_thread_states;
RefToScanQueueSet* _task_queues;
TaskTerminator _terminator;
OWSTTaskTerminator _terminator;
uint _num_workers;
void evacuate_live_objects(G1ParScanThreadState* pss,
@ -3824,7 +3825,7 @@ protected:
G1GCPhaseTimes* p = _g1h->phase_times();
Ticks start = Ticks::now();
G1ParEvacuateFollowersClosure cl(_g1h, pss, _task_queues, _terminator.terminator(), objcopy_phase);
G1ParEvacuateFollowersClosure cl(_g1h, pss, _task_queues, &_terminator, objcopy_phase);
cl.do_void();
assert(pss->queue_is_empty(), "should be empty");

@ -1482,18 +1482,18 @@ protected:
G1CollectedHeap* _g1h;
G1ParScanThreadState* _par_scan_state;
RefToScanQueueSet* _queues;
ParallelTaskTerminator* _terminator;
OWSTTaskTerminator* _terminator;
G1GCPhaseTimes::GCParPhases _phase;
G1ParScanThreadState* par_scan_state() { return _par_scan_state; }
RefToScanQueueSet* queues() { return _queues; }
ParallelTaskTerminator* terminator() { return _terminator; }
OWSTTaskTerminator* terminator() { return _terminator; }
public:
G1ParEvacuateFollowersClosure(G1CollectedHeap* g1h,
G1ParScanThreadState* par_scan_state,
RefToScanQueueSet* queues,
ParallelTaskTerminator* terminator,
OWSTTaskTerminator* terminator,
G1GCPhaseTimes::GCParPhases phase)
: _start_term(0.0), _term_time(0.0), _term_attempts(0),
_g1h(g1h), _par_scan_state(par_scan_state),

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -46,6 +46,7 @@
#include "gc/shared/gcTraceTime.inline.hpp"
#include "gc/shared/gcVMOperations.hpp"
#include "gc/shared/genOopClosures.inline.hpp"
#include "gc/shared/owstTaskTerminator.hpp"
#include "gc/shared/referencePolicy.hpp"
#include "gc/shared/strongRootsScope.hpp"
#include "gc/shared/suspendibleThreadSet.hpp"
@ -600,7 +601,7 @@ void G1ConcurrentMark::set_concurrency(uint active_tasks) {
_num_active_tasks = active_tasks;
// Need to update the three data structures below according to the
// number of active threads for this phase.
_terminator.terminator()->reset_for_reuse((int) active_tasks);
_terminator.reset_for_reuse(active_tasks);
_first_overflow_barrier_sync.set_n_workers((int) active_tasks);
_second_overflow_barrier_sync.set_n_workers((int) active_tasks);
}

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -30,6 +30,7 @@
#include "gc/g1/g1HeapVerifier.hpp"
#include "gc/g1/g1RegionMarkStatsCache.hpp"
#include "gc/g1/heapRegionSet.hpp"
#include "gc/shared/owstTaskTerminator.hpp"
#include "gc/shared/taskqueue.hpp"
#include "gc/shared/verifyOption.hpp"
#include "gc/shared/workgroup.hpp"
@ -328,7 +329,7 @@ class G1ConcurrentMark : public CHeapObj<mtGC> {
G1CMTask** _tasks; // Task queue array (max_worker_id length)
G1CMTaskQueueSet* _task_queues; // Task queue set
TaskTerminator _terminator; // For termination
OWSTTaskTerminator _terminator; // For termination
// Two sync barriers that are used to synchronize tasks when an
// overflow occurs. The algorithm is the following. All tasks enter
@ -414,10 +415,10 @@ class G1ConcurrentMark : public CHeapObj<mtGC> {
// Prints all gathered CM-related statistics
void print_stats();
HeapWord* finger() { return _finger; }
bool concurrent() { return _concurrent; }
uint active_tasks() { return _num_active_tasks; }
ParallelTaskTerminator* terminator() const { return _terminator.terminator(); }
HeapWord* finger() { return _finger; }
bool concurrent() { return _concurrent; }
uint active_tasks() { return _num_active_tasks; }
OWSTTaskTerminator* terminator() { return &_terminator; }
// Claims the next available region to be scanned by a marking
// task/thread. It might return NULL if the next region is empty or

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -59,7 +59,7 @@ void G1FullGCMarkTask::work(uint worker_id) {
}
// Mark stack is populated, now process and drain it.
marker->complete_marking(collector()->oop_queue_set(), collector()->array_queue_set(), _terminator.terminator());
marker->complete_marking(collector()->oop_queue_set(), collector()->array_queue_set(), &_terminator);
// This is the point where the entire marking should have completed.
assert(marker->oop_stack()->is_empty(), "Marking should have completed");

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -36,7 +36,7 @@
class G1FullGCMarkTask : public G1FullGCTask {
G1RootProcessor _root_processor;
TaskTerminator _terminator;
OWSTTaskTerminator _terminator;
public:
G1FullGCMarkTask(G1FullCollector* collector);

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,7 @@
#include "precompiled.hpp"
#include "classfile/classLoaderData.hpp"
#include "gc/g1/g1FullGCMarker.inline.hpp"
#include "gc/shared/owstTaskTerminator.hpp"
#include "gc/shared/referenceProcessor.hpp"
#include "gc/shared/verifyOption.hpp"
#include "memory/iterator.inline.hpp"
@ -49,7 +50,7 @@ G1FullGCMarker::~G1FullGCMarker() {
void G1FullGCMarker::complete_marking(OopQueueSet* oop_stacks,
ObjArrayTaskQueueSet* array_stacks,
ParallelTaskTerminator* terminator) {
OWSTTaskTerminator* terminator) {
do {
drain_stack();
ObjArrayTask steal_array;

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -87,7 +87,7 @@ public:
inline void drain_stack();
void complete_marking(OopQueueSet* oop_stacks,
ObjArrayTaskQueueSet* array_stacks,
ParallelTaskTerminator* terminator);
OWSTTaskTerminator* terminator);
// Closure getters
CLDToOopClosure* cld_closure() { return &_cld_closure; }

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -61,7 +61,7 @@ private:
typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
ProcessTask& _proc_task;
G1FullCollector* _collector;
TaskTerminator _terminator;
OWSTTaskTerminator _terminator;
public:
G1RefProcTaskProxy(ProcessTask& proc_task,

@ -38,7 +38,6 @@ class ParallelCompactData;
class ParMarkBitMap;
class ParCompactionManager : public CHeapObj<mtGC> {
friend class ParallelTaskTerminator;
friend class ParMarkBitMap;
friend class PSParallelCompact;
friend class CompactionWithStealingTask;
@ -96,7 +95,7 @@ private:
static void initialize(ParMarkBitMap* mbm);
protected:
// Array of tasks. Needed by the ParallelTaskTerminator.
// Array of task queues. Needed by the task terminator.
static RegionTaskQueueSet* region_array() { return _region_array; }
OverflowTaskQueue<oop, mtGC>* marking_stack() { return &_marking_stack; }

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -49,6 +49,7 @@
#include "gc/shared/gcTrace.hpp"
#include "gc/shared/gcTraceTime.inline.hpp"
#include "gc/shared/isGCActiveMark.hpp"
#include "gc/shared/owstTaskTerminator.hpp"
#include "gc/shared/referencePolicy.hpp"
#include "gc/shared/referenceProcessor.hpp"
#include "gc/shared/referenceProcessorPhaseTimes.hpp"
@ -1969,7 +1970,7 @@ bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
collection_exit.ticks());
#ifdef TRACESPINNING
ParallelTaskTerminator::print_termination_counts();
OWSTTaskTerminator::print_termination_counts();
#endif
AdaptiveSizePolicyOutput::print(size_policy, heap->total_collections());
@ -2149,7 +2150,7 @@ static void mark_from_roots_work(ParallelRootType::Value root_type, uint worker_
cm->follow_marking_stacks();
}
static void steal_marking_work(ParallelTaskTerminator& terminator, uint worker_id) {
static void steal_marking_work(OWSTTaskTerminator& terminator, uint worker_id) {
assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");
ParCompactionManager* cm =
@ -2173,7 +2174,7 @@ class MarkFromRootsTask : public AbstractGangTask {
typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
StrongRootsScope _strong_roots_scope; // needed for Threads::possibly_parallel_threads_do
SequentialSubTasksDone _subtasks;
TaskTerminator _terminator;
OWSTTaskTerminator _terminator;
uint _active_workers;
public:
@ -2197,7 +2198,7 @@ public:
Threads::possibly_parallel_threads_do(true /*parallel */, &closure);
if (_active_workers > 1) {
steal_marking_work(*_terminator.terminator(), worker_id);
steal_marking_work(_terminator, worker_id);
}
}
};
@ -2206,7 +2207,7 @@ class PCRefProcTask : public AbstractGangTask {
typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
ProcessTask& _task;
uint _ergo_workers;
TaskTerminator _terminator;
OWSTTaskTerminator _terminator;
public:
PCRefProcTask(ProcessTask& task, uint ergo_workers) :
@ -2227,7 +2228,7 @@ public:
_task.work(worker_id, *PSParallelCompact::is_alive_closure(),
mark_and_push_closure, follow_stack_closure);
steal_marking_work(*_terminator.terminator(), worker_id);
steal_marking_work(_terminator, worker_id);
}
};
@ -2586,7 +2587,7 @@ void PSParallelCompact::write_block_fill_histogram()
}
#endif // #ifdef ASSERT
static void compaction_with_stealing_work(ParallelTaskTerminator* terminator, uint worker_id) {
static void compaction_with_stealing_work(OWSTTaskTerminator* terminator, uint worker_id) {
assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");
ParCompactionManager* cm =
@ -2622,7 +2623,7 @@ static void compaction_with_stealing_work(ParallelTaskTerminator* terminator, ui
class UpdateDensePrefixAndCompactionTask: public AbstractGangTask {
typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
TaskQueue& _tq;
TaskTerminator _terminator;
OWSTTaskTerminator _terminator;
uint _active_workers;
public:
@ -2644,7 +2645,7 @@ public:
// Once a thread has drained it's stack, it should try to steal regions from
// other threads.
compaction_with_stealing_work(_terminator.terminator(), worker_id);
compaction_with_stealing_work(&_terminator, worker_id);
}
};

@ -40,9 +40,7 @@ class PSAdaptiveSizePolicy;
class PSYoungGen;
class PSOldGen;
class ParCompactionManager;
class ParallelTaskTerminator;
class PSParallelCompact;
class PreGCValues;
class MoveAndUpdateClosure;
class RefProcTaskExecutor;
class ParallelOldTracer;

@ -43,6 +43,7 @@
#include "gc/shared/gcTrace.hpp"
#include "gc/shared/gcTraceTime.inline.hpp"
#include "gc/shared/isGCActiveMark.hpp"
#include "gc/shared/owstTaskTerminator.hpp"
#include "gc/shared/referencePolicy.hpp"
#include "gc/shared/referenceProcessor.hpp"
#include "gc/shared/referenceProcessorPhaseTimes.hpp"
@ -138,7 +139,7 @@ static void scavenge_roots_work(ParallelRootType::Value root_type, uint worker_i
pm->drain_stacks(false);
}
static void steal_work(ParallelTaskTerminator& terminator, uint worker_id) {
static void steal_work(OWSTTaskTerminator& terminator, uint worker_id) {
assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");
PSPromotionManager* pm =
@ -218,7 +219,7 @@ class PSRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
class PSRefProcTask : public AbstractGangTask {
typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
TaskTerminator _terminator;
OWSTTaskTerminator _terminator;
ProcessTask& _task;
uint _active_workers;
@ -240,7 +241,7 @@ public:
_task.work(worker_id, is_alive, keep_alive, evac_followers);
if (_task.marks_oops_alive() && _active_workers > 1) {
steal_work(*_terminator.terminator(), worker_id);
steal_work(_terminator, worker_id);
}
}
};
@ -314,7 +315,7 @@ class ScavengeRootsTask : public AbstractGangTask {
HeapWord* _gen_top;
uint _active_workers;
bool _is_empty;
TaskTerminator _terminator;
OWSTTaskTerminator _terminator;
public:
ScavengeRootsTask(PSOldGen* old_gen,
@ -377,7 +378,7 @@ public:
// ParallelGCThreads is > 1.
if (_active_workers > 1) {
steal_work(*_terminator.terminator() , worker_id);
steal_work(_terminator, worker_id);
}
}
};
@ -731,7 +732,7 @@ bool PSScavenge::invoke_no_policy() {
scavenge_exit.ticks());
#ifdef TRACESPINNING
ParallelTaskTerminator::print_termination_counts();
OWSTTaskTerminator::print_termination_counts();
#endif
AdaptiveSizePolicyOutput::print(size_policy, heap->total_collections());

@ -315,10 +315,6 @@
develop(uintx, PromotionFailureALotInterval, 5, \
"Total collections between promotion failures a lot") \
\
diagnostic(bool, UseOWSTTaskTerminator, true, \
"Use Optimized Work Stealing Threads task termination " \
"protocol") \
\
experimental(uintx, WorkStealingSleepMillis, 1, \
"Sleep time when sleep is used for yields") \
\

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -678,7 +678,7 @@ void GenCollectedHeap::do_collection(bool full,
}
#ifdef TRACESPINNING
ParallelTaskTerminator::print_termination_counts();
OWSTTaskTerminator::print_termination_counts();
#endif
}

@ -1,5 +1,6 @@
/*
* Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,12 +26,72 @@
#include "precompiled.hpp"
#include "gc/shared/owstTaskTerminator.hpp"
#include "gc/shared/taskqueue.hpp"
#include "logging/log.hpp"
#ifdef TRACESPINNING
uint OWSTTaskTerminator::_total_yields = 0;
uint OWSTTaskTerminator::_total_spins = 0;
uint OWSTTaskTerminator::_total_peeks = 0;
#endif
OWSTTaskTerminator::OWSTTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) :
_n_threads(n_threads),
_queue_set(queue_set),
_offered_termination(0),
_spin_master(NULL) {
_blocker = new Monitor(Mutex::leaf, "OWSTTaskTerminator", false, Monitor::_safepoint_check_never);
}
OWSTTaskTerminator::~OWSTTaskTerminator() {
assert(_offered_termination == 0 || !peek_in_queue_set(), "Precondition");
assert(_offered_termination == 0 || _offered_termination == _n_threads, "Terminated or aborted" );
assert(_spin_master == NULL, "Should have been reset");
assert(_blocker != NULL, "Can not be NULL");
delete _blocker;
}
#ifdef ASSERT
bool OWSTTaskTerminator::peek_in_queue_set() {
return _queue_set->peek();
}
#endif
void OWSTTaskTerminator::yield() {
assert(_offered_termination <= _n_threads, "Invariant");
os::naked_yield();
}
#ifdef TRACESPINNING
void OWSTTaskTerminator::print_termination_counts() {
log_trace(gc, task)("TaskTerminator Yields: %u Spins: %u Peeks: %u",
total_yields(), total_spins(), total_peeks());
}
#endif
void OWSTTaskTerminator::reset_for_reuse() {
if (_offered_termination != 0) {
assert(_offered_termination == _n_threads,
"Terminator may still be in use");
_offered_termination = 0;
}
}
void OWSTTaskTerminator::reset_for_reuse(uint n_threads) {
reset_for_reuse();
_n_threads = n_threads;
}
bool OWSTTaskTerminator::exit_termination(size_t tasks, TerminatorTerminator* terminator) {
return tasks > 0 || (terminator != NULL && terminator->should_exit_termination());
}
size_t OWSTTaskTerminator::tasks_in_queue_set() const {
return _queue_set->tasks();
}
bool OWSTTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
assert(_n_threads > 0, "Initialization is incorrect");
assert(_offered_termination < _n_threads, "Invariant");
@ -163,7 +224,7 @@ bool OWSTTaskTerminator::do_spin_master_work(TerminatorTerminator* terminator) {
}
#ifdef TRACESPINNING
_total_peeks++;
_total_peeks++;
#endif
size_t tasks = tasks_in_queue_set();
bool exit = exit_termination(tasks, terminator);

@ -1,5 +1,6 @@
/*
* Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -24,14 +25,20 @@
#ifndef SHARE_GC_SHARED_OWSTTASKTERMINATOR_HPP
#define SHARE_GC_SHARED_OWSTTASKTERMINATOR_HPP
#include "gc/shared/taskqueue.hpp"
#include "memory/allocation.hpp"
#include "runtime/mutex.hpp"
#include "runtime/thread.hpp"
// Define this to enable additional tracing probes.
#undef TRACESPINNING
class TaskQueueSetSuper;
class TerminatorTerminator;
/*
* OWST stands for Optimized Work Stealing Threads
* Provides a task termination protocol. OWST stands for Optimized Work Stealing Threads
*
* This is an enhanced implementation of Google's work stealing
* This is an enhanced implementation of Google's work stealing task termination
* protocol, which is described in the paper:
* "Wessam Hassanein. 2016. Understanding and improving JVM GC work
* stealing at the data center scale. In Proceedings of the 2016 ACM
@ -43,38 +50,71 @@
* The intention of above enhancement is to reduce spin-master's latency on detecting new tasks
* for stealing and termination condition.
*/
class OWSTTaskTerminator : public CHeapObj<mtGC> {
uint _n_threads;
TaskQueueSetSuper* _queue_set;
DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE, 0);
volatile uint _offered_termination;
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile uint));
#ifdef ASSERT
bool peek_in_queue_set();
#endif
void yield();
class OWSTTaskTerminator: public ParallelTaskTerminator {
private:
Monitor* _blocker;
Thread* _spin_master;
#ifdef TRACESPINNING
static uint _total_yields;
static uint _total_spins;
static uint _total_peeks;
#endif
// If we should exit current termination protocol
bool exit_termination(size_t tasks, TerminatorTerminator* terminator);
size_t tasks_in_queue_set() const;
// Perform spin-master task.
// Return true if termination condition is detected, otherwise return false
bool do_spin_master_work(TerminatorTerminator* terminator);
NONCOPYABLE(OWSTTaskTerminator);
public:
OWSTTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) :
ParallelTaskTerminator(n_threads, queue_set), _spin_master(NULL) {
_blocker = new Monitor(Mutex::leaf, "OWSTTaskTerminator", false, Monitor::_safepoint_check_never);
}
virtual ~OWSTTaskTerminator() {
assert(_spin_master == NULL, "Should have been reset");
assert(_blocker != NULL, "Can not be NULL");
delete _blocker;
OWSTTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set);
~OWSTTaskTerminator();
// The current thread has no work, and is ready to terminate if everyone
// else is. If returns "true", all threads are terminated. If returns
// "false", available work has been observed in one of the task queues,
// so the global task is not complete.
bool offer_termination() {
return offer_termination(NULL);
}
// As above, but it also terminates if the should_exit_termination()
// method of the terminator parameter returns true. If terminator is
// NULL, then it is ignored.
bool offer_termination(TerminatorTerminator* terminator);
protected:
// If should exit current termination protocol
virtual bool exit_termination(size_t tasks, TerminatorTerminator* terminator);
// Reset the terminator, so that it may be reused again.
// The caller is responsible for ensuring that this is done
// in an MT-safe manner, once the previous round of use of
// the terminator is finished.
void reset_for_reuse();
// Same as above but the number of parallel threads is set to the
// given number.
void reset_for_reuse(uint n_threads);
private:
size_t tasks_in_queue_set() { return _queue_set->tasks(); }
/*
* Perform spin-master task.
* Return true if termination condition is detected, otherwise return false
*/
bool do_spin_master_work(TerminatorTerminator* terminator);
#ifdef TRACESPINNING
static uint total_yields() { return _total_yields; }
static uint total_spins() { return _total_spins; }
static uint total_peeks() { return _total_peeks; }
static void print_termination_counts();
#endif
};

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -33,12 +33,6 @@
#include "utilities/debug.hpp"
#include "utilities/stack.inline.hpp"
#ifdef TRACESPINNING
uint ParallelTaskTerminator::_total_yields = 0;
uint ParallelTaskTerminator::_total_spins = 0;
uint ParallelTaskTerminator::_total_peeks = 0;
#endif
#if TASKQUEUE_STATS
const char * const TaskQueueStats::_names[last_stat_id] = {
"qpush", "qpop", "qpop-s", "qattempt", "qsteal", "opush", "omax"
@ -112,171 +106,9 @@ void TaskQueueStats::verify() const
#endif // ASSERT
#endif // TASKQUEUE_STATS
ParallelTaskTerminator::
ParallelTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) :
_n_threads(n_threads),
_queue_set(queue_set),
_offered_termination(0) {}
ParallelTaskTerminator::~ParallelTaskTerminator() {
assert(_offered_termination == 0 || !peek_in_queue_set(), "Precondition");
assert(_offered_termination == 0 || _offered_termination == _n_threads, "Terminated or aborted" );
}
bool ParallelTaskTerminator::peek_in_queue_set() {
return _queue_set->peek();
}
void ParallelTaskTerminator::yield() {
assert(_offered_termination <= _n_threads, "Invariant");
os::naked_yield();
}
void ParallelTaskTerminator::sleep(uint millis) {
assert(_offered_termination <= _n_threads, "Invariant");
os::naked_sleep(millis);
}
bool
ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
assert(_n_threads > 0, "Initialization is incorrect");
assert(_offered_termination < _n_threads, "Invariant");
Atomic::inc(&_offered_termination);
uint yield_count = 0;
// Number of hard spin loops done since last yield
uint hard_spin_count = 0;
// Number of iterations in the hard spin loop.
uint hard_spin_limit = WorkStealingHardSpins;
// If WorkStealingSpinToYieldRatio is 0, no hard spinning is done.
// If it is greater than 0, then start with a small number
// of spins and increase number with each turn at spinning until
// the count of hard spins exceeds WorkStealingSpinToYieldRatio.
// Then do a yield() call and start spinning afresh.
if (WorkStealingSpinToYieldRatio > 0) {
hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio;
hard_spin_limit = MAX2(hard_spin_limit, 1U);
}
// Remember the initial spin limit.
uint hard_spin_start = hard_spin_limit;
// Loop waiting for all threads to offer termination or
// more work.
while (true) {
assert(_offered_termination <= _n_threads, "Invariant");
// Are all threads offering termination?
if (_offered_termination == _n_threads) {
assert(!peek_in_queue_set(), "Precondition");
return true;
} else {
// Look for more work.
// Periodically sleep() instead of yield() to give threads
// waiting on the cores the chance to grab this code
if (yield_count <= WorkStealingYieldsBeforeSleep) {
// Do a yield or hardspin. For purposes of deciding whether
// to sleep, count this as a yield.
yield_count++;
// Periodically call yield() instead spinning
// After WorkStealingSpinToYieldRatio spins, do a yield() call
// and reset the counts and starting limit.
if (hard_spin_count > WorkStealingSpinToYieldRatio) {
yield();
hard_spin_count = 0;
hard_spin_limit = hard_spin_start;
#ifdef TRACESPINNING
_total_yields++;
#endif
} else {
// Hard spin this time
// Increase the hard spinning period but only up to a limit.
hard_spin_limit = MIN2(2*hard_spin_limit,
(uint) WorkStealingHardSpins);
for (uint j = 0; j < hard_spin_limit; j++) {
SpinPause();
}
hard_spin_count++;
#ifdef TRACESPINNING
_total_spins++;
#endif
}
} else {
log_develop_trace(gc, task)("ParallelTaskTerminator::offer_termination() thread " PTR_FORMAT " sleeps after %u yields",
p2i(Thread::current()), yield_count);
yield_count = 0;
// A sleep will cause this processor to seek work on another processor's
// runqueue, if it has nothing else to run (as opposed to the yield
// which may only move the thread to the end of the this processor's
// runqueue).
sleep(WorkStealingSleepMillis);
}
#ifdef TRACESPINNING
_total_peeks++;
#endif
if (peek_in_queue_set() ||
(terminator != NULL && terminator->should_exit_termination())) {
return complete_or_exit_termination();
}
}
}
}
#ifdef TRACESPINNING
void ParallelTaskTerminator::print_termination_counts() {
log_trace(gc, task)("ParallelTaskTerminator Total yields: %u"
" Total spins: %u Total peeks: %u",
total_yields(),
total_spins(),
total_peeks());
}
#endif
bool ParallelTaskTerminator::complete_or_exit_termination() {
// If termination is ever reached, terminator should stay in such state,
// so that all threads see the same state
uint current_offered = _offered_termination;
uint expected_value;
do {
if (current_offered == _n_threads) {
assert(!peek_in_queue_set(), "Precondition");
return true;
}
expected_value = current_offered;
} while ((current_offered = Atomic::cmpxchg(&_offered_termination, current_offered, current_offered - 1)) != expected_value);
assert(_offered_termination < _n_threads, "Invariant");
return false;
}
void ParallelTaskTerminator::reset_for_reuse() {
if (_offered_termination != 0) {
assert(_offered_termination == _n_threads,
"Terminator may still be in use");
_offered_termination = 0;
}
}
#ifdef ASSERT
bool ObjArrayTask::is_valid() const {
return _obj != NULL && _obj->is_objArray() && _index >= 0 &&
_index < objArrayOop(_obj)->length();
}
#endif // ASSERT
void ParallelTaskTerminator::reset_for_reuse(uint n_threads) {
reset_for_reuse();
_n_threads = n_threads;
}
TaskTerminator::TaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) :
_terminator(UseOWSTTaskTerminator ? new OWSTTaskTerminator(n_threads, queue_set)
: new ParallelTaskTerminator(n_threads, queue_set)) {
}
TaskTerminator::~TaskTerminator() {
if (_terminator != NULL) {
delete _terminator;
}
}

@ -443,89 +443,6 @@ public:
virtual bool should_exit_termination() = 0;
};
// A class to aid in the termination of a set of parallel tasks using
// TaskQueueSet's for work stealing.
#undef TRACESPINNING
class ParallelTaskTerminator: public CHeapObj<mtGC> {
protected:
uint _n_threads;
TaskQueueSetSuper* _queue_set;
DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE, 0);
volatile uint _offered_termination;
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile uint));
#ifdef TRACESPINNING
static uint _total_yields;
static uint _total_spins;
static uint _total_peeks;
#endif
bool peek_in_queue_set();
protected:
virtual void yield();
void sleep(uint millis);
// Called when exiting termination is requested.
// When the request is made, terminator may have already terminated
// (e.g. all threads are arrived and offered termination). In this case,
// it should ignore the request and complete the termination.
// Return true if termination is completed. Otherwise, return false.
bool complete_or_exit_termination();
public:
// "n_threads" is the number of threads to be terminated. "queue_set" is a
// queue sets of work queues of other threads.
ParallelTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set);
virtual ~ParallelTaskTerminator();
// The current thread has no work, and is ready to terminate if everyone
// else is. If returns "true", all threads are terminated. If returns
// "false", available work has been observed in one of the task queues,
// so the global task is not complete.
bool offer_termination() {
return offer_termination(NULL);
}
// As above, but it also terminates if the should_exit_termination()
// method of the terminator parameter returns true. If terminator is
// NULL, then it is ignored.
virtual bool offer_termination(TerminatorTerminator* terminator);
// Reset the terminator, so that it may be reused again.
// The caller is responsible for ensuring that this is done
// in an MT-safe manner, once the previous round of use of
// the terminator is finished.
void reset_for_reuse();
// Same as above but the number of parallel threads is set to the
// given number.
void reset_for_reuse(uint n_threads);
#ifdef TRACESPINNING
static uint total_yields() { return _total_yields; }
static uint total_spins() { return _total_spins; }
static uint total_peeks() { return _total_peeks; }
static void print_termination_counts();
#endif
};
class TaskTerminator : public StackObj {
private:
ParallelTaskTerminator* _terminator;
NONCOPYABLE(TaskTerminator);
public:
TaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set);
~TaskTerminator();
ParallelTaskTerminator* terminator() const {
return _terminator;
}
};
typedef GenericTaskQueue<oop, mtGC> OopTaskQueue;
typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet;

@ -51,12 +51,7 @@ bool ShenandoahObjToScanQueueSet::is_empty() {
}
ShenandoahTaskTerminator::ShenandoahTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) :
_terminator(new OWSTTaskTerminator(n_threads, queue_set)) { }
ShenandoahTaskTerminator::~ShenandoahTaskTerminator() {
assert(_terminator != NULL, "Invariant");
delete _terminator;
}
_terminator(n_threads, queue_set) { }
#if TASKQUEUE_STATS
void ShenandoahObjToScanQueueSet::print_taskqueue_stats_hdr(outputStream* const st) {

@ -340,16 +340,15 @@ public:
class ShenandoahTaskTerminator : public StackObj {
private:
OWSTTaskTerminator* const _terminator;
OWSTTaskTerminator _terminator;
public:
ShenandoahTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set);
~ShenandoahTaskTerminator();
bool offer_termination(ShenandoahTerminatorTerminator* terminator) {
return _terminator->offer_termination(terminator);
return _terminator.offer_termination(terminator);
}
void reset_for_reuse() { _terminator->reset_for_reuse(); }
void reset_for_reuse() { _terminator.reset_for_reuse(); }
bool offer_termination() { return offer_termination((ShenandoahTerminatorTerminator*)NULL); }
};