8301116: Parallelize TLAB resizing in G1
Reviewed-by: ayang, iwalulya
This commit is contained in:
parent
c72f951529
commit
83e2db6ba3
@ -1010,7 +1010,7 @@ void G1CollectedHeap::verify_before_full_collection(bool explicit_gc) {
|
||||
_verifier->verify_bitmap_clear(true /* above_tams_only */);
|
||||
}
|
||||
|
||||
void G1CollectedHeap::prepare_heap_for_mutators() {
|
||||
void G1CollectedHeap::prepare_for_mutator_after_full_collection() {
|
||||
// Delete metaspaces for unloaded class loaders and clean up loader_data graph
|
||||
ClassLoaderDataGraph::purge(/*at_safepoint*/true);
|
||||
DEBUG_ONLY(MetaspaceUtils::verify();)
|
||||
@ -1025,9 +1025,8 @@ void G1CollectedHeap::prepare_heap_for_mutators() {
|
||||
// Rebuild the code root lists for each region
|
||||
rebuild_code_roots();
|
||||
|
||||
// Start a new incremental collection set for the next pause
|
||||
start_new_collection_set();
|
||||
|
||||
allocate_dummy_regions();
|
||||
_allocator->init_mutator_alloc_regions();
|
||||
|
||||
// Post collection state updates.
|
||||
@ -2642,8 +2641,6 @@ class VerifyRegionRemSetClosure : public HeapRegionClosure {
|
||||
};
|
||||
|
||||
void G1CollectedHeap::start_new_collection_set() {
|
||||
double start = os::elapsedTime();
|
||||
|
||||
collection_set()->start_incremental_building();
|
||||
|
||||
clear_region_attr();
|
||||
@ -2654,8 +2651,6 @@ void G1CollectedHeap::start_new_collection_set() {
|
||||
// We redo the verification but now wrt to the new CSet which
|
||||
// has just got initialized after the previous CSet was freed.
|
||||
_cm->verify_no_collection_set_oops();
|
||||
|
||||
phase_times()->record_start_new_cset_time_ms((os::elapsedTime() - start) * 1000.0);
|
||||
}
|
||||
|
||||
G1HeapVerifier::G1VerifyType G1CollectedHeap::young_collection_verify_type() const {
|
||||
@ -2765,19 +2760,18 @@ G1JFRTracerMark::~G1JFRTracerMark() {
|
||||
_tracer->report_gc_end(_timer->gc_end(), _timer->time_partitions());
|
||||
}
|
||||
|
||||
void G1CollectedHeap::prepare_tlabs_for_mutator() {
|
||||
void G1CollectedHeap::prepare_for_mutator_after_young_collection() {
|
||||
Ticks start = Ticks::now();
|
||||
|
||||
_survivor_evac_stats.adjust_desired_plab_size();
|
||||
_old_evac_stats.adjust_desired_plab_size();
|
||||
|
||||
// Start a new incremental collection set for the mutator phase.
|
||||
start_new_collection_set();
|
||||
allocate_dummy_regions();
|
||||
|
||||
_allocator->init_mutator_alloc_regions();
|
||||
|
||||
resize_all_tlabs();
|
||||
|
||||
phase_times()->record_resize_tlab_time_ms((Ticks::now() - start).seconds() * 1000.0);
|
||||
phase_times()->record_prepare_for_mutator_time_ms((Ticks::now() - start).seconds() * 1000.0);
|
||||
}
|
||||
|
||||
void G1CollectedHeap::retire_tlabs() {
|
||||
|
@ -57,6 +57,7 @@
|
||||
#include "memory/iterator.hpp"
|
||||
#include "memory/memRegion.hpp"
|
||||
#include "runtime/mutexLocker.hpp"
|
||||
#include "runtime/threadSMR.hpp"
|
||||
#include "utilities/bitMap.hpp"
|
||||
|
||||
// A "G1CollectedHeap" is an implementation of a java heap for HotSpot.
|
||||
@ -120,6 +121,32 @@ class G1RegionMappingChangedListener : public G1MappingChangedListener {
|
||||
void on_commit(uint start_idx, size_t num_regions, bool zero_filled) override;
|
||||
};
|
||||
|
||||
// Helper to claim contiguous sets of JavaThread for processing by multiple threads.
|
||||
class G1JavaThreadsListClaimer : public StackObj {
|
||||
ThreadsListHandle _list;
|
||||
uint _claim_step;
|
||||
|
||||
volatile uint _cur_claim;
|
||||
|
||||
// Attempts to claim _claim_step JavaThreads, returning an array of claimed
|
||||
// JavaThread* with count elements. Returns null (and a zero count) if there
|
||||
// are no more threads to claim.
|
||||
JavaThread* const* claim(uint& count);
|
||||
|
||||
public:
|
||||
G1JavaThreadsListClaimer(uint claim_step) : _list(), _claim_step(claim_step), _cur_claim(0) {
|
||||
assert(claim_step > 0, "must be");
|
||||
}
|
||||
|
||||
// Executes the given closure on the elements of the JavaThread list, chunking the
|
||||
// JavaThread set in claim_step chunks for each caller to reduce parallelization
|
||||
// overhead.
|
||||
void apply(ThreadClosure* cl);
|
||||
|
||||
// Total number of JavaThreads that can be claimed.
|
||||
uint length() const { return _list.length(); }
|
||||
};
|
||||
|
||||
class G1CollectedHeap : public CollectedHeap {
|
||||
friend class VM_G1CollectForAllocation;
|
||||
friend class VM_G1CollectFull;
|
||||
@ -491,7 +518,7 @@ private:
|
||||
bool abort_concurrent_cycle();
|
||||
void verify_before_full_collection(bool explicit_gc);
|
||||
void prepare_heap_for_full_collection();
|
||||
void prepare_heap_for_mutators();
|
||||
void prepare_for_mutator_after_full_collection();
|
||||
void abort_refinement();
|
||||
void verify_after_full_collection();
|
||||
void print_heap_after_full_collection();
|
||||
@ -771,7 +798,7 @@ public:
|
||||
// Start a concurrent cycle.
|
||||
void start_concurrent_cycle(bool concurrent_operation_is_full_mark);
|
||||
|
||||
void prepare_tlabs_for_mutator();
|
||||
void prepare_for_mutator_after_young_collection();
|
||||
|
||||
void retire_tlabs();
|
||||
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include "gc/shared/taskqueue.inline.hpp"
|
||||
#include "oops/stackChunkOop.hpp"
|
||||
#include "runtime/atomic.hpp"
|
||||
#include "runtime/threadSMR.inline.hpp"
|
||||
#include "utilities/bitMap.inline.hpp"
|
||||
|
||||
inline bool G1STWIsAliveClosure::do_object_b(oop p) {
|
||||
@ -49,6 +50,30 @@ inline bool G1STWIsAliveClosure::do_object_b(oop p) {
|
||||
return !_g1h->is_in_cset(p) || p->is_forwarded();
|
||||
}
|
||||
|
||||
inline JavaThread* const* G1JavaThreadsListClaimer::claim(uint& count) {
|
||||
count = 0;
|
||||
if (Atomic::load(&_cur_claim) >= _list.length()) {
|
||||
return nullptr;
|
||||
}
|
||||
uint claim = Atomic::fetch_and_add(&_cur_claim, _claim_step);
|
||||
if (claim >= _list.length()) {
|
||||
return nullptr;
|
||||
}
|
||||
count = MIN2(_list.length() - claim, _claim_step);
|
||||
return _list.list()->threads() + claim;
|
||||
}
|
||||
|
||||
inline void G1JavaThreadsListClaimer::apply(ThreadClosure* cl) {
|
||||
JavaThread* const* list;
|
||||
uint count;
|
||||
|
||||
while ((list = claim(count)) != nullptr) {
|
||||
for (uint i = 0; i < count; i++) {
|
||||
cl->do_thread(list[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
G1GCPhaseTimes* G1CollectedHeap::phase_times() const {
|
||||
return _policy->phase_times();
|
||||
}
|
||||
|
@ -228,7 +228,7 @@ void G1FullCollector::complete_collection() {
|
||||
// Prepare the bitmap for the next (potentially concurrent) marking.
|
||||
_heap->concurrent_mark()->clear_bitmap(_heap->workers());
|
||||
|
||||
_heap->prepare_heap_for_mutators();
|
||||
_heap->prepare_for_mutator_after_full_collection();
|
||||
|
||||
_heap->resize_all_tlabs();
|
||||
|
||||
|
@ -153,6 +153,8 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
|
||||
_gc_par_phases[RedirtyCards] = new WorkerDataArray<double>("RedirtyCards", "Redirty Logged Cards (ms):", max_gc_threads);
|
||||
_gc_par_phases[RedirtyCards]->create_thread_work_items("Redirtied Cards:");
|
||||
|
||||
_gc_par_phases[ResizeThreadLABs] = new WorkerDataArray<double>("ResizeTLABs", "Resize TLABs (ms):", max_gc_threads);
|
||||
|
||||
_gc_par_phases[FreeCollectionSet] = new WorkerDataArray<double>("FreeCSet", "Free Collection Set (ms):", max_gc_threads);
|
||||
_gc_par_phases[YoungFreeCSet] = new WorkerDataArray<double>("YoungFreeCSet", "Young Free Collection Set (ms):", max_gc_threads);
|
||||
_gc_par_phases[NonYoungFreeCSet] = new WorkerDataArray<double>("NonYoungFreeCSet", "Non-Young Free Collection Set (ms):", max_gc_threads);
|
||||
@ -173,7 +175,6 @@ void G1GCPhaseTimes::reset() {
|
||||
_cur_prepare_merge_heap_roots_time_ms = 0.0;
|
||||
_cur_optional_prepare_merge_heap_roots_time_ms = 0.0;
|
||||
_cur_prepare_tlab_time_ms = 0.0;
|
||||
_cur_resize_tlab_time_ms = 0.0;
|
||||
_cur_post_evacuate_cleanup_1_time_ms = 0.0;
|
||||
_cur_post_evacuate_cleanup_2_time_ms = 0.0;
|
||||
_cur_expand_heap_time_ms = 0.0;
|
||||
@ -184,7 +185,7 @@ void G1GCPhaseTimes::reset() {
|
||||
_recorded_prepare_heap_roots_time_ms = 0.0;
|
||||
_recorded_young_cset_choice_time_ms = 0.0;
|
||||
_recorded_non_young_cset_choice_time_ms = 0.0;
|
||||
_recorded_start_new_cset_time_ms = 0.0;
|
||||
_recorded_prepare_for_mutator_time_ms = 0.0;
|
||||
_recorded_serial_free_cset_time_ms = 0.0;
|
||||
_recorded_total_rebuild_freelist_time_ms = 0.0;
|
||||
_recorded_serial_rebuild_freelist_time_ms = 0.0;
|
||||
@ -489,7 +490,7 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed
|
||||
_cur_post_evacuate_cleanup_1_time_ms +
|
||||
_cur_post_evacuate_cleanup_2_time_ms +
|
||||
_recorded_total_rebuild_freelist_time_ms +
|
||||
_recorded_start_new_cset_time_ms +
|
||||
_recorded_prepare_for_mutator_time_ms +
|
||||
_cur_expand_heap_time_ms;
|
||||
|
||||
info_time("Post Evacuate Collection Set", sum_ms);
|
||||
@ -527,6 +528,9 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed
|
||||
debug_phase(_gc_par_phases[SampleCollectionSetCandidates], 1);
|
||||
}
|
||||
debug_phase(_gc_par_phases[RedirtyCards], 1);
|
||||
if (UseTLAB && ResizeTLAB) {
|
||||
debug_phase(_gc_par_phases[ResizeThreadLABs], 1);
|
||||
}
|
||||
debug_phase(_gc_par_phases[FreeCollectionSet], 1);
|
||||
trace_phase(_gc_par_phases[YoungFreeCSet], true, 1);
|
||||
trace_phase(_gc_par_phases[NonYoungFreeCSet], true, 1);
|
||||
@ -537,10 +541,7 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed
|
||||
trace_time("Serial Rebuild Free List ", _recorded_serial_rebuild_freelist_time_ms);
|
||||
trace_phase(_gc_par_phases[RebuildFreeList]);
|
||||
|
||||
debug_time("Start New Collection Set", _recorded_start_new_cset_time_ms);
|
||||
if (UseTLAB && ResizeTLAB) {
|
||||
debug_time("Resize TLABs", _cur_resize_tlab_time_ms);
|
||||
}
|
||||
debug_time("Prepare For Mutator", _recorded_prepare_for_mutator_time_ms);
|
||||
debug_time("Expand Heap After Collection", _cur_expand_heap_time_ms);
|
||||
|
||||
return sum_ms;
|
||||
|
@ -74,6 +74,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
FreeCollectionSet,
|
||||
YoungFreeCSet,
|
||||
NonYoungFreeCSet,
|
||||
ResizeThreadLABs,
|
||||
RebuildFreeList,
|
||||
SampleCollectionSetCandidates,
|
||||
MergePSS,
|
||||
@ -179,7 +180,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
double _cur_optional_prepare_merge_heap_roots_time_ms;
|
||||
|
||||
double _cur_prepare_tlab_time_ms;
|
||||
double _cur_resize_tlab_time_ms;
|
||||
|
||||
double _cur_concatenate_dirty_card_logs_time_ms;
|
||||
|
||||
@ -199,7 +199,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
double _recorded_young_cset_choice_time_ms;
|
||||
double _recorded_non_young_cset_choice_time_ms;
|
||||
|
||||
double _recorded_start_new_cset_time_ms;
|
||||
double _recorded_prepare_for_mutator_time_ms;
|
||||
|
||||
double _recorded_serial_free_cset_time_ms;
|
||||
|
||||
@ -276,10 +276,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
_cur_prepare_tlab_time_ms = ms;
|
||||
}
|
||||
|
||||
void record_resize_tlab_time_ms(double ms) {
|
||||
_cur_resize_tlab_time_ms = ms;
|
||||
}
|
||||
|
||||
void record_concatenate_dirty_card_logs_time_ms(double ms) {
|
||||
_cur_concatenate_dirty_card_logs_time_ms = ms;
|
||||
}
|
||||
@ -356,8 +352,8 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
_recorded_non_young_cset_choice_time_ms = time_ms;
|
||||
}
|
||||
|
||||
void record_start_new_cset_time_ms(double time_ms) {
|
||||
_recorded_start_new_cset_time_ms = time_ms;
|
||||
void record_prepare_for_mutator_time_ms(double time_ms) {
|
||||
_recorded_prepare_for_mutator_time_ms = time_ms;
|
||||
}
|
||||
|
||||
void record_cur_collection_start_sec(double time_ms) {
|
||||
|
@ -1022,9 +1022,7 @@ void G1YoungCollector::post_evacuate_collection_set(G1EvacInfo* evacuation_info,
|
||||
|
||||
evacuation_info->set_bytes_used(_g1h->bytes_used_during_gc());
|
||||
|
||||
_g1h->start_new_collection_set();
|
||||
|
||||
_g1h->prepare_tlabs_for_mutator();
|
||||
_g1h->prepare_for_mutator_after_young_collection();
|
||||
|
||||
_g1h->gc_epilogue(false);
|
||||
|
||||
|
@ -37,6 +37,8 @@
|
||||
#include "gc/g1/g1YoungGCPostEvacuateTasks.hpp"
|
||||
#include "gc/shared/preservedMarks.inline.hpp"
|
||||
#include "jfr/jfrEvents.hpp"
|
||||
#include "runtime/threads.hpp"
|
||||
#include "runtime/threadSMR.hpp"
|
||||
#include "utilities/ticks.hpp"
|
||||
|
||||
class G1PostEvacuateCollectionSetCleanupTask1::MergePssTask : public G1AbstractSubTask {
|
||||
@ -701,6 +703,31 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class G1PostEvacuateCollectionSetCleanupTask2::ResizeTLABsTask : public G1AbstractSubTask {
|
||||
G1JavaThreadsListClaimer _claimer;
|
||||
|
||||
// There is not much work per thread so the number of threads per worker is high.
|
||||
static const uint ThreadsPerWorker = 250;
|
||||
|
||||
public:
|
||||
ResizeTLABsTask() : G1AbstractSubTask(G1GCPhaseTimes::ResizeThreadLABs), _claimer(ThreadsPerWorker) { }
|
||||
|
||||
void do_work(uint worker_id) override {
|
||||
class ResizeClosure : public ThreadClosure {
|
||||
public:
|
||||
|
||||
void do_thread(Thread* thread) {
|
||||
static_cast<JavaThread*>(thread)->tlab().resize();
|
||||
}
|
||||
} cl;
|
||||
_claimer.apply(&cl);
|
||||
}
|
||||
|
||||
double worker_cost() const override {
|
||||
return (double)_claimer.length() / ThreadsPerWorker;
|
||||
}
|
||||
};
|
||||
|
||||
G1PostEvacuateCollectionSetCleanupTask2::G1PostEvacuateCollectionSetCleanupTask2(G1ParScanThreadStateSet* per_thread_states,
|
||||
G1EvacInfo* evacuation_info,
|
||||
G1EvacFailureRegions* evac_failure_regions) :
|
||||
@ -722,6 +749,9 @@ G1PostEvacuateCollectionSetCleanupTask2::G1PostEvacuateCollectionSetCleanupTask2
|
||||
}
|
||||
}
|
||||
add_parallel_task(new RedirtyLoggedCardsTask(per_thread_states->rdcqs(), evac_failure_regions));
|
||||
if (UseTLAB && ResizeTLAB) {
|
||||
add_parallel_task(new ResizeTLABsTask());
|
||||
}
|
||||
add_parallel_task(new FreeCollectionSetTask(evacuation_info,
|
||||
per_thread_states->surviving_young_words(),
|
||||
evac_failure_regions));
|
||||
|
@ -60,6 +60,7 @@ public:
|
||||
// - Redirty Logged Cards
|
||||
// - Restore Preserved Marks (on evacuation failure)
|
||||
// - Free Collection Set
|
||||
// - Resize TLABs
|
||||
class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask {
|
||||
class EagerlyReclaimHumongousObjectsTask;
|
||||
class ResetHotCardCacheTask;
|
||||
@ -70,6 +71,7 @@ class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask {
|
||||
class ClearRetainedRegionBitmaps;
|
||||
class RedirtyLoggedCardsTask;
|
||||
class RestorePreservedMarksTask;
|
||||
class ResizeTLABsTask;
|
||||
class FreeCollectionSetTask;
|
||||
|
||||
public:
|
||||
|
@ -183,6 +183,7 @@ public class TestGCLogMessages {
|
||||
new LogMessageWithLevelC2OrJVMCIOnly("Update Derived Pointers", Level.DEBUG),
|
||||
new LogMessageWithLevel("Redirty Logged Cards", Level.DEBUG),
|
||||
new LogMessageWithLevel("Redirtied Cards", Level.DEBUG),
|
||||
new LogMessageWithLevel("Resize TLABs", Level.DEBUG),
|
||||
new LogMessageWithLevel("Free Collection Set", Level.DEBUG),
|
||||
new LogMessageWithLevel("Serial Free Collection Set", Level.TRACE),
|
||||
new LogMessageWithLevel("Young Free Collection Set", Level.TRACE),
|
||||
@ -192,8 +193,7 @@ public class TestGCLogMessages {
|
||||
new LogMessageWithLevel("Rebuild Free List", Level.DEBUG),
|
||||
new LogMessageWithLevel("Serial Rebuild Free List", Level.TRACE),
|
||||
new LogMessageWithLevel("Parallel Rebuild Free List", Level.TRACE),
|
||||
new LogMessageWithLevel("Start New Collection Set", Level.DEBUG),
|
||||
new LogMessageWithLevel("Resize TLABs", Level.DEBUG),
|
||||
new LogMessageWithLevel("Prepare For Mutator", Level.DEBUG),
|
||||
new LogMessageWithLevel("Expand Heap After Collection", Level.DEBUG),
|
||||
};
|
||||
|
||||
|
@ -107,6 +107,7 @@ public class TestG1ParallelPhases {
|
||||
"RedirtyCards",
|
||||
"RecalculateUsed",
|
||||
"ResetHotCardCache",
|
||||
"ResizeTLABs",
|
||||
"FreeCSet",
|
||||
"UpdateDerivedPointers",
|
||||
"EagerlyReclaimHumongousObjects",
|
||||
|
Loading…
Reference in New Issue
Block a user