diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp index 3d6306ee335..6c70eedca67 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp @@ -1010,7 +1010,7 @@ void G1CollectedHeap::verify_before_full_collection(bool explicit_gc) { _verifier->verify_bitmap_clear(true /* above_tams_only */); } -void G1CollectedHeap::prepare_heap_for_mutators() { +void G1CollectedHeap::prepare_for_mutator_after_full_collection() { // Delete metaspaces for unloaded class loaders and clean up loader_data graph ClassLoaderDataGraph::purge(/*at_safepoint*/true); DEBUG_ONLY(MetaspaceUtils::verify();) @@ -1025,9 +1025,8 @@ void G1CollectedHeap::prepare_heap_for_mutators() { // Rebuild the code root lists for each region rebuild_code_roots(); - // Start a new incremental collection set for the next pause start_new_collection_set(); - + allocate_dummy_regions(); _allocator->init_mutator_alloc_regions(); // Post collection state updates. @@ -2642,8 +2641,6 @@ class VerifyRegionRemSetClosure : public HeapRegionClosure { }; void G1CollectedHeap::start_new_collection_set() { - double start = os::elapsedTime(); - collection_set()->start_incremental_building(); clear_region_attr(); @@ -2654,8 +2651,6 @@ void G1CollectedHeap::start_new_collection_set() { // We redo the verification but now wrt to the new CSet which // has just got initialized after the previous CSet was freed. _cm->verify_no_collection_set_oops(); - - phase_times()->record_start_new_cset_time_ms((os::elapsedTime() - start) * 1000.0); } G1HeapVerifier::G1VerifyType G1CollectedHeap::young_collection_verify_type() const { @@ -2765,19 +2760,18 @@ G1JFRTracerMark::~G1JFRTracerMark() { _tracer->report_gc_end(_timer->gc_end(), _timer->time_partitions()); } -void G1CollectedHeap::prepare_tlabs_for_mutator() { +void G1CollectedHeap::prepare_for_mutator_after_young_collection() { Ticks start = Ticks::now(); _survivor_evac_stats.adjust_desired_plab_size(); _old_evac_stats.adjust_desired_plab_size(); + // Start a new incremental collection set for the mutator phase. + start_new_collection_set(); allocate_dummy_regions(); - _allocator->init_mutator_alloc_regions(); - resize_all_tlabs(); - - phase_times()->record_resize_tlab_time_ms((Ticks::now() - start).seconds() * 1000.0); + phase_times()->record_prepare_for_mutator_time_ms((Ticks::now() - start).seconds() * 1000.0); } void G1CollectedHeap::retire_tlabs() { diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp index 77358d22433..3636f610306 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp @@ -57,6 +57,7 @@ #include "memory/iterator.hpp" #include "memory/memRegion.hpp" #include "runtime/mutexLocker.hpp" +#include "runtime/threadSMR.hpp" #include "utilities/bitMap.hpp" // A "G1CollectedHeap" is an implementation of a java heap for HotSpot. @@ -120,6 +121,32 @@ class G1RegionMappingChangedListener : public G1MappingChangedListener { void on_commit(uint start_idx, size_t num_regions, bool zero_filled) override; }; +// Helper to claim contiguous sets of JavaThread for processing by multiple threads. +class G1JavaThreadsListClaimer : public StackObj { + ThreadsListHandle _list; + uint _claim_step; + + volatile uint _cur_claim; + + // Attempts to claim _claim_step JavaThreads, returning an array of claimed + // JavaThread* with count elements. Returns null (and a zero count) if there + // are no more threads to claim. + JavaThread* const* claim(uint& count); + +public: + G1JavaThreadsListClaimer(uint claim_step) : _list(), _claim_step(claim_step), _cur_claim(0) { + assert(claim_step > 0, "must be"); + } + + // Executes the given closure on the elements of the JavaThread list, chunking the + // JavaThread set in claim_step chunks for each caller to reduce parallelization + // overhead. + void apply(ThreadClosure* cl); + + // Total number of JavaThreads that can be claimed. + uint length() const { return _list.length(); } +}; + class G1CollectedHeap : public CollectedHeap { friend class VM_G1CollectForAllocation; friend class VM_G1CollectFull; @@ -491,7 +518,7 @@ private: bool abort_concurrent_cycle(); void verify_before_full_collection(bool explicit_gc); void prepare_heap_for_full_collection(); - void prepare_heap_for_mutators(); + void prepare_for_mutator_after_full_collection(); void abort_refinement(); void verify_after_full_collection(); void print_heap_after_full_collection(); @@ -771,7 +798,7 @@ public: // Start a concurrent cycle. void start_concurrent_cycle(bool concurrent_operation_is_full_mark); - void prepare_tlabs_for_mutator(); + void prepare_for_mutator_after_young_collection(); void retire_tlabs(); diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp index e7f9b9ddf91..caf07c69788 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp @@ -41,6 +41,7 @@ #include "gc/shared/taskqueue.inline.hpp" #include "oops/stackChunkOop.hpp" #include "runtime/atomic.hpp" +#include "runtime/threadSMR.inline.hpp" #include "utilities/bitMap.inline.hpp" inline bool G1STWIsAliveClosure::do_object_b(oop p) { @@ -49,6 +50,30 @@ inline bool G1STWIsAliveClosure::do_object_b(oop p) { return !_g1h->is_in_cset(p) || p->is_forwarded(); } +inline JavaThread* const* G1JavaThreadsListClaimer::claim(uint& count) { + count = 0; + if (Atomic::load(&_cur_claim) >= _list.length()) { + return nullptr; + } + uint claim = Atomic::fetch_and_add(&_cur_claim, _claim_step); + if (claim >= _list.length()) { + return nullptr; + } + count = MIN2(_list.length() - claim, _claim_step); + return _list.list()->threads() + claim; +} + +inline void G1JavaThreadsListClaimer::apply(ThreadClosure* cl) { + JavaThread* const* list; + uint count; + + while ((list = claim(count)) != nullptr) { + for (uint i = 0; i < count; i++) { + cl->do_thread(list[i]); + } + } +} + G1GCPhaseTimes* G1CollectedHeap::phase_times() const { return _policy->phase_times(); } diff --git a/src/hotspot/share/gc/g1/g1FullCollector.cpp b/src/hotspot/share/gc/g1/g1FullCollector.cpp index 01d7fdf06d8..cae06c67d6c 100644 --- a/src/hotspot/share/gc/g1/g1FullCollector.cpp +++ b/src/hotspot/share/gc/g1/g1FullCollector.cpp @@ -228,7 +228,7 @@ void G1FullCollector::complete_collection() { // Prepare the bitmap for the next (potentially concurrent) marking. _heap->concurrent_mark()->clear_bitmap(_heap->workers()); - _heap->prepare_heap_for_mutators(); + _heap->prepare_for_mutator_after_full_collection(); _heap->resize_all_tlabs(); diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp index 3d7e204bfa3..5bbab0f93af 100644 --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp @@ -153,6 +153,8 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) : _gc_par_phases[RedirtyCards] = new WorkerDataArray("RedirtyCards", "Redirty Logged Cards (ms):", max_gc_threads); _gc_par_phases[RedirtyCards]->create_thread_work_items("Redirtied Cards:"); + _gc_par_phases[ResizeThreadLABs] = new WorkerDataArray("ResizeTLABs", "Resize TLABs (ms):", max_gc_threads); + _gc_par_phases[FreeCollectionSet] = new WorkerDataArray("FreeCSet", "Free Collection Set (ms):", max_gc_threads); _gc_par_phases[YoungFreeCSet] = new WorkerDataArray("YoungFreeCSet", "Young Free Collection Set (ms):", max_gc_threads); _gc_par_phases[NonYoungFreeCSet] = new WorkerDataArray("NonYoungFreeCSet", "Non-Young Free Collection Set (ms):", max_gc_threads); @@ -173,7 +175,6 @@ void G1GCPhaseTimes::reset() { _cur_prepare_merge_heap_roots_time_ms = 0.0; _cur_optional_prepare_merge_heap_roots_time_ms = 0.0; _cur_prepare_tlab_time_ms = 0.0; - _cur_resize_tlab_time_ms = 0.0; _cur_post_evacuate_cleanup_1_time_ms = 0.0; _cur_post_evacuate_cleanup_2_time_ms = 0.0; _cur_expand_heap_time_ms = 0.0; @@ -184,7 +185,7 @@ void G1GCPhaseTimes::reset() { _recorded_prepare_heap_roots_time_ms = 0.0; _recorded_young_cset_choice_time_ms = 0.0; _recorded_non_young_cset_choice_time_ms = 0.0; - _recorded_start_new_cset_time_ms = 0.0; + _recorded_prepare_for_mutator_time_ms = 0.0; _recorded_serial_free_cset_time_ms = 0.0; _recorded_total_rebuild_freelist_time_ms = 0.0; _recorded_serial_rebuild_freelist_time_ms = 0.0; @@ -489,7 +490,7 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed _cur_post_evacuate_cleanup_1_time_ms + _cur_post_evacuate_cleanup_2_time_ms + _recorded_total_rebuild_freelist_time_ms + - _recorded_start_new_cset_time_ms + + _recorded_prepare_for_mutator_time_ms + _cur_expand_heap_time_ms; info_time("Post Evacuate Collection Set", sum_ms); @@ -527,6 +528,9 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed debug_phase(_gc_par_phases[SampleCollectionSetCandidates], 1); } debug_phase(_gc_par_phases[RedirtyCards], 1); + if (UseTLAB && ResizeTLAB) { + debug_phase(_gc_par_phases[ResizeThreadLABs], 1); + } debug_phase(_gc_par_phases[FreeCollectionSet], 1); trace_phase(_gc_par_phases[YoungFreeCSet], true, 1); trace_phase(_gc_par_phases[NonYoungFreeCSet], true, 1); @@ -537,10 +541,7 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed trace_time("Serial Rebuild Free List ", _recorded_serial_rebuild_freelist_time_ms); trace_phase(_gc_par_phases[RebuildFreeList]); - debug_time("Start New Collection Set", _recorded_start_new_cset_time_ms); - if (UseTLAB && ResizeTLAB) { - debug_time("Resize TLABs", _cur_resize_tlab_time_ms); - } + debug_time("Prepare For Mutator", _recorded_prepare_for_mutator_time_ms); debug_time("Expand Heap After Collection", _cur_expand_heap_time_ms); return sum_ms; diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp index 98fb9688102..f6df957cc4f 100644 --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp @@ -74,6 +74,7 @@ class G1GCPhaseTimes : public CHeapObj { FreeCollectionSet, YoungFreeCSet, NonYoungFreeCSet, + ResizeThreadLABs, RebuildFreeList, SampleCollectionSetCandidates, MergePSS, @@ -179,7 +180,6 @@ class G1GCPhaseTimes : public CHeapObj { double _cur_optional_prepare_merge_heap_roots_time_ms; double _cur_prepare_tlab_time_ms; - double _cur_resize_tlab_time_ms; double _cur_concatenate_dirty_card_logs_time_ms; @@ -199,7 +199,7 @@ class G1GCPhaseTimes : public CHeapObj { double _recorded_young_cset_choice_time_ms; double _recorded_non_young_cset_choice_time_ms; - double _recorded_start_new_cset_time_ms; + double _recorded_prepare_for_mutator_time_ms; double _recorded_serial_free_cset_time_ms; @@ -276,10 +276,6 @@ class G1GCPhaseTimes : public CHeapObj { _cur_prepare_tlab_time_ms = ms; } - void record_resize_tlab_time_ms(double ms) { - _cur_resize_tlab_time_ms = ms; - } - void record_concatenate_dirty_card_logs_time_ms(double ms) { _cur_concatenate_dirty_card_logs_time_ms = ms; } @@ -356,8 +352,8 @@ class G1GCPhaseTimes : public CHeapObj { _recorded_non_young_cset_choice_time_ms = time_ms; } - void record_start_new_cset_time_ms(double time_ms) { - _recorded_start_new_cset_time_ms = time_ms; + void record_prepare_for_mutator_time_ms(double time_ms) { + _recorded_prepare_for_mutator_time_ms = time_ms; } void record_cur_collection_start_sec(double time_ms) { diff --git a/src/hotspot/share/gc/g1/g1YoungCollector.cpp b/src/hotspot/share/gc/g1/g1YoungCollector.cpp index 37659776d1b..045cafd3b78 100644 --- a/src/hotspot/share/gc/g1/g1YoungCollector.cpp +++ b/src/hotspot/share/gc/g1/g1YoungCollector.cpp @@ -1022,9 +1022,7 @@ void G1YoungCollector::post_evacuate_collection_set(G1EvacInfo* evacuation_info, evacuation_info->set_bytes_used(_g1h->bytes_used_during_gc()); - _g1h->start_new_collection_set(); - - _g1h->prepare_tlabs_for_mutator(); + _g1h->prepare_for_mutator_after_young_collection(); _g1h->gc_epilogue(false); diff --git a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp index 4ac32e91cb2..9438a1b82d2 100644 --- a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp +++ b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp @@ -37,6 +37,8 @@ #include "gc/g1/g1YoungGCPostEvacuateTasks.hpp" #include "gc/shared/preservedMarks.inline.hpp" #include "jfr/jfrEvents.hpp" +#include "runtime/threads.hpp" +#include "runtime/threadSMR.hpp" #include "utilities/ticks.hpp" class G1PostEvacuateCollectionSetCleanupTask1::MergePssTask : public G1AbstractSubTask { @@ -701,6 +703,31 @@ public: } }; +class G1PostEvacuateCollectionSetCleanupTask2::ResizeTLABsTask : public G1AbstractSubTask { + G1JavaThreadsListClaimer _claimer; + + // There is not much work per thread so the number of threads per worker is high. + static const uint ThreadsPerWorker = 250; + +public: + ResizeTLABsTask() : G1AbstractSubTask(G1GCPhaseTimes::ResizeThreadLABs), _claimer(ThreadsPerWorker) { } + + void do_work(uint worker_id) override { + class ResizeClosure : public ThreadClosure { + public: + + void do_thread(Thread* thread) { + static_cast(thread)->tlab().resize(); + } + } cl; + _claimer.apply(&cl); + } + + double worker_cost() const override { + return (double)_claimer.length() / ThreadsPerWorker; + } +}; + G1PostEvacuateCollectionSetCleanupTask2::G1PostEvacuateCollectionSetCleanupTask2(G1ParScanThreadStateSet* per_thread_states, G1EvacInfo* evacuation_info, G1EvacFailureRegions* evac_failure_regions) : @@ -722,6 +749,9 @@ G1PostEvacuateCollectionSetCleanupTask2::G1PostEvacuateCollectionSetCleanupTask2 } } add_parallel_task(new RedirtyLoggedCardsTask(per_thread_states->rdcqs(), evac_failure_regions)); + if (UseTLAB && ResizeTLAB) { + add_parallel_task(new ResizeTLABsTask()); + } add_parallel_task(new FreeCollectionSetTask(evacuation_info, per_thread_states->surviving_young_words(), evac_failure_regions)); diff --git a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp index c897059541d..4035607ec6c 100644 --- a/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp +++ b/src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.hpp @@ -60,6 +60,7 @@ public: // - Redirty Logged Cards // - Restore Preserved Marks (on evacuation failure) // - Free Collection Set +// - Resize TLABs class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask { class EagerlyReclaimHumongousObjectsTask; class ResetHotCardCacheTask; @@ -70,6 +71,7 @@ class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask { class ClearRetainedRegionBitmaps; class RedirtyLoggedCardsTask; class RestorePreservedMarksTask; + class ResizeTLABsTask; class FreeCollectionSetTask; public: diff --git a/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java b/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java index 755882ba693..7886d5628fa 100644 --- a/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java +++ b/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java @@ -183,6 +183,7 @@ public class TestGCLogMessages { new LogMessageWithLevelC2OrJVMCIOnly("Update Derived Pointers", Level.DEBUG), new LogMessageWithLevel("Redirty Logged Cards", Level.DEBUG), new LogMessageWithLevel("Redirtied Cards", Level.DEBUG), + new LogMessageWithLevel("Resize TLABs", Level.DEBUG), new LogMessageWithLevel("Free Collection Set", Level.DEBUG), new LogMessageWithLevel("Serial Free Collection Set", Level.TRACE), new LogMessageWithLevel("Young Free Collection Set", Level.TRACE), @@ -192,8 +193,7 @@ public class TestGCLogMessages { new LogMessageWithLevel("Rebuild Free List", Level.DEBUG), new LogMessageWithLevel("Serial Rebuild Free List", Level.TRACE), new LogMessageWithLevel("Parallel Rebuild Free List", Level.TRACE), - new LogMessageWithLevel("Start New Collection Set", Level.DEBUG), - new LogMessageWithLevel("Resize TLABs", Level.DEBUG), + new LogMessageWithLevel("Prepare For Mutator", Level.DEBUG), new LogMessageWithLevel("Expand Heap After Collection", Level.DEBUG), }; diff --git a/test/jdk/jdk/jfr/event/gc/collection/TestG1ParallelPhases.java b/test/jdk/jdk/jfr/event/gc/collection/TestG1ParallelPhases.java index acdae0711d1..99b5238af68 100644 --- a/test/jdk/jdk/jfr/event/gc/collection/TestG1ParallelPhases.java +++ b/test/jdk/jdk/jfr/event/gc/collection/TestG1ParallelPhases.java @@ -107,6 +107,7 @@ public class TestG1ParallelPhases { "RedirtyCards", "RecalculateUsed", "ResetHotCardCache", + "ResizeTLABs", "FreeCSet", "UpdateDerivedPointers", "EagerlyReclaimHumongousObjects",