8245961: Shenandoah: move some root marking to concurrent phase

Reviewed-by: shade
This commit is contained in:
Zhengyu Gu 2020-06-02 14:57:40 -04:00
parent 8752e02e66
commit 512cc3ebf2
8 changed files with 198 additions and 82 deletions

@ -174,7 +174,6 @@ public:
rp = NULL;
}
_cm->concurrent_scan_code_roots(worker_id, rp);
_cm->mark_loop(worker_id, _terminator, rp,
true, // cancellable
ShenandoahStringDedup::is_enabled()); // perform string dedup
@ -215,6 +214,44 @@ public:
}
};
// Process concurrent roots at safepoints
template <typename T>
class ShenandoahProcessConcurrentRootsTask : public AbstractGangTask {
private:
ShenandoahConcurrentRootScanner<false /* concurrent */> _rs;
ShenandoahConcurrentMark* const _cm;
ReferenceProcessor* _rp;
public:
ShenandoahProcessConcurrentRootsTask(ShenandoahConcurrentMark* cm,
ShenandoahPhaseTimings::Phase phase,
uint nworkers);
void work(uint worker_id);
};
template <typename T>
ShenandoahProcessConcurrentRootsTask<T>::ShenandoahProcessConcurrentRootsTask(ShenandoahConcurrentMark* cm,
ShenandoahPhaseTimings::Phase phase,
uint nworkers) :
AbstractGangTask("Shenandoah STW Concurrent Mark Task"),
_rs(nworkers, phase),
_cm(cm),
_rp(NULL) {
ShenandoahHeap* heap = ShenandoahHeap::heap();
if (heap->process_references()) {
_rp = heap->ref_processor();
shenandoah_assert_rp_isalive_installed();
}
}
template <typename T>
void ShenandoahProcessConcurrentRootsTask<T>::work(uint worker_id) {
ShenandoahParallelWorkerSession worker_session(worker_id);
ShenandoahObjToScanQueue* q = _cm->task_queues()->queue(worker_id);
T cl(q, _rp);
_rs.oops_do(&cl, worker_id);
}
class ShenandoahFinalMarkingTask : public AbstractGangTask {
private:
ShenandoahConcurrentMark* _cm;
@ -267,13 +304,6 @@ public:
}
}
if (heap->is_degenerated_gc_in_progress() || heap->is_full_gc_in_progress()) {
// Full GC does not execute concurrent cycle.
// Degenerated cycle may bypass concurrent cycle.
// So code roots might not be scanned, let's scan here.
_cm->concurrent_scan_code_roots(worker_id, rp);
}
_cm->mark_loop(worker_id, _terminator, rp,
false, // not cancellable
_dedup_string);
@ -308,8 +338,6 @@ void ShenandoahConcurrentMark::mark_roots(ShenandoahPhaseTimings::Phase root_pha
ShenandoahInitMarkRootsTask<NONE> mark_roots(&root_proc);
workers->run_task(&mark_roots);
}
clear_claim_codecache();
}
void ShenandoahConcurrentMark::update_roots(ShenandoahPhaseTimings::Phase root_phase) {
@ -390,34 +418,47 @@ void ShenandoahConcurrentMark::initialize(uint workers) {
}
}
void ShenandoahConcurrentMark::concurrent_scan_code_roots(uint worker_id, ReferenceProcessor* rp) {
if (_heap->unload_classes()) {
return;
}
// Mark concurrent roots during concurrent phases
class ShenandoahMarkConcurrentRootsTask : public AbstractGangTask {
private:
SuspendibleThreadSetJoiner _sts_joiner;
ShenandoahConcurrentRootScanner<true /* concurrent */> _rs;
ShenandoahObjToScanQueueSet* const _queue_set;
ReferenceProcessor* const _rp;
if (claim_codecache()) {
ShenandoahObjToScanQueue* q = task_queues()->queue(worker_id);
MutexLocker mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
// TODO: We can not honor StringDeduplication here, due to lock ranking
// inversion. So, we may miss some deduplication candidates.
if (_heap->has_forwarded_objects()) {
ShenandoahMarkResolveRefsClosure cl(q, rp);
CodeBlobToOopClosure blobs(&cl, !CodeBlobToOopClosure::FixRelocations);
CodeCache::blobs_do(&blobs);
} else {
ShenandoahMarkRefsClosure cl(q, rp);
CodeBlobToOopClosure blobs(&cl, !CodeBlobToOopClosure::FixRelocations);
CodeCache::blobs_do(&blobs);
}
}
public:
ShenandoahMarkConcurrentRootsTask(ShenandoahObjToScanQueueSet* qs,
ReferenceProcessor* rp,
ShenandoahPhaseTimings::Phase phase,
uint nworkers);
void work(uint worker_id);
};
ShenandoahMarkConcurrentRootsTask::ShenandoahMarkConcurrentRootsTask(ShenandoahObjToScanQueueSet* qs,
ReferenceProcessor* rp,
ShenandoahPhaseTimings::Phase phase,
uint nworkers) :
AbstractGangTask("Shenandoah Concurrent Mark Task"),
_rs(nworkers, phase),
_queue_set(qs),
_rp(rp) {
assert(!ShenandoahHeap::heap()->has_forwarded_objects(), "Not expected");
}
void ShenandoahMarkConcurrentRootsTask::work(uint worker_id) {
ShenandoahConcurrentWorkerSession worker_session(worker_id);
ShenandoahObjToScanQueue* q = _queue_set->queue(worker_id);
ShenandoahMarkResolveRefsClosure cl(q, _rp);
_rs.oops_do(&cl, worker_id);
}
void ShenandoahConcurrentMark::mark_from_roots() {
WorkGang* workers = _heap->workers();
uint nworkers = workers->active_workers();
ReferenceProcessor* rp = NULL;
if (_heap->process_references()) {
ReferenceProcessor* rp = _heap->ref_processor();
rp = _heap->ref_processor();
rp->set_active_mt_degree(nworkers);
// enable ("weak") refs discovery
@ -431,6 +472,13 @@ void ShenandoahConcurrentMark::mark_from_roots() {
task_queues()->reserve(nworkers);
{
ShenandoahGCPhase phase(ShenandoahPhaseTimings::conc_mark_roots);
// Use separate task to mark concurrent roots, since it may hold ClassLoaderData_lock and CodeCache_lock
ShenandoahMarkConcurrentRootsTask task(task_queues(), rp, ShenandoahPhaseTimings::conc_mark_roots, nworkers);
workers->run_task(&task);
}
{
TaskTerminator terminator(nworkers, task_queues());
ShenandoahConcurrentMarkingTask task(this, &terminator);
@ -445,30 +493,50 @@ void ShenandoahConcurrentMark::finish_mark_from_roots(bool full_gc) {
uint nworkers = _heap->workers()->active_workers();
// Finally mark everything else we've got in our queues during the previous steps.
// It does two different things for concurrent vs. mark-compact GC:
// - For concurrent GC, it starts with empty task queues, drains the remaining
// SATB buffers, and then completes the marking closure.
// - For mark-compact GC, it starts out with the task queues seeded by initial
// root scan, and completes the closure, thus marking through all live objects
// The implementation is the same, so it's shared here.
{
ShenandoahGCPhase phase(full_gc ?
ShenandoahPhaseTimings::full_gc_mark_finish_queues :
ShenandoahPhaseTimings::finish_queues);
task_queues()->reserve(nworkers);
shenandoah_assert_rp_isalive_not_installed();
ShenandoahIsAliveSelector is_alive;
ReferenceProcessorIsAliveMutator fix_isalive(_heap->ref_processor(), is_alive.is_alive_closure());
StrongRootsScope scope(nworkers);
TaskTerminator terminator(nworkers, task_queues());
ShenandoahFinalMarkingTask task(this, &terminator, ShenandoahStringDedup::is_enabled());
_heap->workers()->run_task(&task);
}
// Full GC does not execute concurrent cycle. Degenerated cycle may bypass concurrent cycle.
// In those cases, concurrent roots might not be scanned, scan them here. Ideally, this
// should piggyback to ShenandoahFinalMarkingTask, but it makes time tracking very hard.
// Given full GC and degenerated GC are rare, use a separate task.
if (_heap->is_degenerated_gc_in_progress() || _heap->is_full_gc_in_progress()) {
ShenandoahPhaseTimings::Phase phase = _heap->is_full_gc_in_progress() ?
ShenandoahPhaseTimings::full_gc_scan_conc_roots :
ShenandoahPhaseTimings::degen_gc_scan_conc_roots;
ShenandoahGCPhase gc_phase(phase);
if (_heap->has_forwarded_objects()) {
ShenandoahProcessConcurrentRootsTask<ShenandoahMarkResolveRefsClosure> task(this, phase, nworkers);
_heap->workers()->run_task(&task);
} else {
ShenandoahProcessConcurrentRootsTask<ShenandoahMarkRefsClosure> task(this, phase, nworkers);
_heap->workers()->run_task(&task);
}
}
assert(task_queues()->is_empty(), "Should be empty");
// Finally mark everything else we've got in our queues during the previous steps.
// It does two different things for concurrent vs. mark-compact GC:
// - For concurrent GC, it starts with empty task queues, drains the remaining
// SATB buffers, and then completes the marking closure.
// - For mark-compact GC, it starts out with the task queues seeded by initial
// root scan, and completes the closure, thus marking through all live objects
// The implementation is the same, so it's shared here.
{
ShenandoahGCPhase phase(full_gc ?
ShenandoahPhaseTimings::full_gc_mark_finish_queues :
ShenandoahPhaseTimings::finish_queues);
task_queues()->reserve(nworkers);
StrongRootsScope scope(nworkers);
TaskTerminator terminator(nworkers, task_queues());
ShenandoahFinalMarkingTask task(this, &terminator, ShenandoahStringDedup::is_enabled());
_heap->workers()->run_task(&task);
}
assert(task_queues()->is_empty(), "Should be empty");
}
// When we're done marking everything, we process weak references.
if (_heap->process_references()) {
@ -942,11 +1010,3 @@ void ShenandoahConcurrentMark::mark_loop_work(T* cl, ShenandoahLiveData* live_da
}
}
}
bool ShenandoahConcurrentMark::claim_codecache() {
return _claimed_codecache.try_set();
}
void ShenandoahConcurrentMark::clear_claim_codecache() {
_claimed_codecache.unset();
}

@ -91,16 +91,6 @@ private:
public:
void preclean_weak_refs();
// ---------- Concurrent code cache
//
private:
ShenandoahSharedFlag _claimed_codecache;
public:
void concurrent_scan_code_roots(uint worker_id, ReferenceProcessor* rp);
bool claim_codecache();
void clear_claim_codecache();
// ---------- Helpers
// Used from closures, need to be public
//

@ -523,13 +523,13 @@ void ShenandoahNMethodList::transfer(ShenandoahNMethodList* const list, int limi
}
ShenandoahNMethodList* ShenandoahNMethodList::acquire() {
assert(CodeCache_lock->owned_by_self(), "Lock must be held");
assert_locked_or_safepoint(CodeCache_lock);
_ref_count++;
return this;
}
void ShenandoahNMethodList::release() {
assert(CodeCache_lock->owned_by_self(), "Lock must be held");
assert_locked_or_safepoint(CodeCache_lock);
_ref_count--;
if (_ref_count == 0) {
delete this;

@ -103,12 +103,15 @@ bool ShenandoahPhaseTimings::is_worker_phase(Phase phase) {
case full_gc_scan_roots:
case full_gc_update_roots:
case full_gc_adjust_roots:
case degen_gc_scan_conc_roots:
case degen_gc_update_roots:
case full_gc_scan_conc_roots:
case full_gc_purge_class_unload:
case full_gc_purge_weak_par:
case purge_class_unload:
case purge_weak_par:
case heap_iteration_roots:
case conc_mark_roots:
case conc_weak_roots_work:
case conc_strong_roots:
return true;

@ -68,6 +68,9 @@ class outputStream;
f(resize_tlabs, " Resize TLABs") \
\
f(conc_mark, "Concurrent Marking") \
f(conc_mark_roots, " Roots ") \
SHENANDOAH_PAR_PHASE_DO(conc_mark_roots, " CM: ", f) \
\
f(conc_preclean, "Concurrent Precleaning") \
\
f(final_mark_gross, "Pause Final Mark (G)") \
@ -128,6 +131,8 @@ class outputStream;
\
f(degen_gc_gross, "Pause Degenerated GC (G)") \
f(degen_gc, "Pause Degenerated GC (N)") \
f(degen_gc_scan_conc_roots, " Degen Mark Roots") \
SHENANDOAH_PAR_PHASE_DO(degen_gc_conc_mark_, " DM: ", f) \
f(degen_gc_update_roots, " Degen Update Roots") \
SHENANDOAH_PAR_PHASE_DO(degen_gc_update_, " DU: ", f) \
\
@ -137,6 +142,8 @@ class outputStream;
f(full_gc_prepare, " Prepare") \
f(full_gc_scan_roots, " Scan Roots") \
SHENANDOAH_PAR_PHASE_DO(full_gc_scan_roots_, " FS: ", f) \
f(full_gc_scan_conc_roots, " Scan Concurrent Roots") \
SHENANDOAH_PAR_PHASE_DO(full_gc_scan_conc_roots, " FCS: ", f) \
f(full_gc_update_roots, " Update Roots") \
SHENANDOAH_PAR_PHASE_DO(full_gc_update_roots_, " FU: ", f) \
f(full_gc_mark, " Mark") \

@ -28,6 +28,7 @@
#include "classfile/stringTable.hpp"
#include "classfile/systemDictionary.hpp"
#include "code/codeCache.hpp"
#include "code/nmethod.hpp"
#include "gc/shenandoah/shenandoahClosures.inline.hpp"
#include "gc/shenandoah/shenandoahConcurrentRoots.hpp"
#include "gc/shenandoah/shenandoahRootProcessor.inline.hpp"
@ -199,10 +200,12 @@ ShenandoahRootScanner::ShenandoahRootScanner(uint n_workers, ShenandoahPhaseTimi
ShenandoahRootProcessor(phase),
_serial_roots(phase),
_thread_roots(phase, n_workers > 1),
_code_roots(phase),
_vm_roots(phase),
_dedup_roots(phase),
_cld_roots(phase, n_workers) {
_dedup_roots(phase) {
nmethod::oops_do_marking_prologue();
}
ShenandoahRootScanner::~ShenandoahRootScanner() {
nmethod::oops_do_marking_epilogue();
}
void ShenandoahRootScanner::roots_do(uint worker_id, OopClosure* oops) {
@ -232,9 +235,7 @@ void ShenandoahRootScanner::roots_do(uint worker_id, OopClosure* oops, CLDClosur
_serial_roots.oops_do(oops, worker_id);
// Process light-weight/limited parallel roots then
_vm_roots.oops_do(oops, worker_id);
_dedup_roots.oops_do(&always_true, oops, worker_id);
_cld_roots.cld_do(clds, worker_id);
// Process heavy-weight/fully parallel roots the last
_thread_roots.threads_do(&tc_cl, worker_id);
@ -249,10 +250,6 @@ void ShenandoahRootScanner::strong_roots_do(uint worker_id, OopClosure* oops, CL
// Process serial-claiming roots first
_serial_roots.oops_do(oops, worker_id);
// Process light-weight/limited parallel roots then
_vm_roots.oops_do(oops, worker_id);
_cld_roots.always_strong_cld_do(clds, worker_id);
// Process heavy-weight/fully parallel roots the last
_thread_roots.threads_do(&tc_cl, worker_id);
}

@ -259,13 +259,11 @@ class ShenandoahRootScanner : public ShenandoahRootProcessor {
private:
ShenandoahSerialRoots _serial_roots;
ShenandoahThreadRoots _thread_roots;
ShenandoahCodeCacheRoots _code_roots;
ShenandoahVMRoots<false /*concurrent*/ > _vm_roots;
ShenandoahStringDedupRoots _dedup_roots;
ShenandoahClassLoaderDataRoots<false /*concurrent*/, false /*single threaded*/>
_cld_roots;
public:
ShenandoahRootScanner(uint n_workers, ShenandoahPhaseTimings::Phase phase);
~ShenandoahRootScanner();
// Apply oops, clds and blobs to all strongly reachable roots in the system,
// during class unloading cycle
@ -278,6 +276,22 @@ public:
void roots_do(uint worker_id, OopClosure* oops, CLDClosure* clds, CodeBlobClosure* code, ThreadClosure* tc = NULL);
};
template <bool CONCURRENT>
class ShenandoahConcurrentRootScanner {
private:
ShenandoahVMRoots<CONCURRENT> _vm_roots;
ShenandoahClassLoaderDataRoots<CONCURRENT, false /* single-threaded*/>
_cld_roots;
ShenandoahNMethodTableSnapshot* _codecache_snapshot;
ShenandoahPhaseTimings::Phase _phase;
public:
ShenandoahConcurrentRootScanner(uint n_workers, ShenandoahPhaseTimings::Phase phase);
~ShenandoahConcurrentRootScanner();
void oops_do(OopClosure* oops, uint worker_id);
};
// This scanner is only for SH::object_iteration() and only supports single-threaded
// root scanning
class ShenandoahHeapIterationRootScanner : public ShenandoahRootProcessor {

@ -184,6 +184,51 @@ public:
}
};
template <bool CONCURRENT>
ShenandoahConcurrentRootScanner<CONCURRENT>::ShenandoahConcurrentRootScanner(uint n_workers,
ShenandoahPhaseTimings::Phase phase) :
_vm_roots(phase),
_cld_roots(phase, n_workers),
_codecache_snapshot(NULL),
_phase(phase) {
if (!ShenandoahHeap::heap()->unload_classes()) {
if (CONCURRENT) {
CodeCache_lock->lock_without_safepoint_check();
} else {
assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint");
}
_codecache_snapshot = ShenandoahCodeRoots::table()->snapshot_for_iteration();
}
assert(!CONCURRENT || !ShenandoahHeap::heap()->has_forwarded_objects(), "Not expecting forwarded pointers during concurrent marking");
}
template <bool CONCURRENT>
ShenandoahConcurrentRootScanner<CONCURRENT>::~ShenandoahConcurrentRootScanner() {
if (!ShenandoahHeap::heap()->unload_classes()) {
ShenandoahCodeRoots::table()->finish_iteration(_codecache_snapshot);
if (CONCURRENT) {
CodeCache_lock->unlock();
}
}
}
template <bool CONCURRENT>
void ShenandoahConcurrentRootScanner<CONCURRENT>::oops_do(OopClosure* oops, uint worker_id) {
ShenandoahHeap* const heap = ShenandoahHeap::heap();
CLDToOopClosure clds_cl(oops, CONCURRENT ? ClassLoaderData::_claim_strong : ClassLoaderData::_claim_none);
_vm_roots.oops_do(oops, worker_id);
if (!heap->unload_classes()) {
_cld_roots.cld_do(&clds_cl, worker_id);
ShenandoahWorkerTimingsTracker timer(_phase, ShenandoahPhaseTimings::CodeCacheRoots, worker_id);
CodeBlobToOopClosure blobs(oops, !CodeBlobToOopClosure::FixRelocations);
_codecache_snapshot->parallel_blobs_do(&blobs);
} else {
_cld_roots.always_strong_cld_do(&clds_cl, worker_id);
}
}
template <typename IsAlive, typename KeepAlive>
void ShenandoahRootUpdater::roots_do(uint worker_id, IsAlive* is_alive, KeepAlive* keep_alive) {
CodeBlobToOopClosure update_blobs(keep_alive, CodeBlobToOopClosure::FixRelocations);