8159979: During initial mark, preparing all regions for marking may take a significant amount of time

Reviewed-by: tschatzl, ayang
This commit is contained in:
Ivan Walulya 2021-08-31 12:30:14 +00:00
parent 98fa53357a
commit 841e3943c4
5 changed files with 101 additions and 34 deletions

@ -3567,16 +3567,8 @@ void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo* evacuation_i
DerivedPointerTable::clear();
#endif
// Concurrent start needs claim bits to keep track of the marked-through CLDs.
if (collector_state()->in_concurrent_start_gc()) {
concurrent_mark()->pre_concurrent_start(gc_cause());
double start_clear_claimed_marks = os::elapsedTime();
ClassLoaderDataGraph::clear_claimed_marks();
double recorded_clear_claimed_marks_time_ms = (os::elapsedTime() - start_clear_claimed_marks) * 1000.0;
phase_times()->record_clear_claimed_marks_time_ms(recorded_clear_claimed_marks_time_ms);
}
// Should G1EvacuationFailureALot be in effect for this GC?

@ -27,6 +27,7 @@
#include "classfile/systemDictionary.hpp"
#include "code/codeCache.hpp"
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1BatchedGangTask.hpp"
#include "gc/g1/g1CardSetMemory.hpp"
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1CollectorState.hpp"
@ -40,6 +41,7 @@
#include "gc/g1/g1ThreadLocalData.hpp"
#include "gc/g1/g1Trace.hpp"
#include "gc/g1/heapRegion.inline.hpp"
#include "gc/g1/heapRegionManager.hpp"
#include "gc/g1/heapRegionRemSet.inline.hpp"
#include "gc/g1/heapRegionSet.inline.hpp"
#include "gc/shared/gcId.hpp"
@ -471,6 +473,8 @@ void G1ConcurrentMark::reset() {
_top_at_rebuild_starts[i] = NULL;
_region_mark_stats[i].clear();
}
_root_regions.reset();
}
void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) {
@ -729,25 +733,89 @@ void G1ConcurrentMark::clear_next_bitmap(WorkGang* workers) {
clear_next_bitmap(workers, false);
}
class G1PreConcurrentStartTask : public G1BatchedGangTask {
// Concurrent start needs claim bits to keep track of the marked-through CLDs.
class CLDClearClaimedMarksTask;
// Reset marking state.
class ResetMarkingStateTask;
// For each region note start of marking.
class NoteStartOfMarkTask;
public:
G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
};
class G1PreConcurrentStartTask::CLDClearClaimedMarksTask : public G1AbstractSubTask {
public:
CLDClearClaimedMarksTask() : G1AbstractSubTask(G1GCPhaseTimes::CLDClearClaimedMarks) { }
double worker_cost() const override { return 1.0; }
void do_work(uint worker_id) override;
};
class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
G1ConcurrentMark* _cm;
public:
ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
double worker_cost() const override { return 1.0; }
void do_work(uint worker_id) override;
};
class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
HeapRegionClaimer _claimer;
public:
NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _claimer(0) { }
double worker_cost() const override {
// The work done per region is very small, therefore we choose this magic number to cap the number
// of threads used when there are few regions.
const uint regions_per_thread = 1000;
return _claimer.n_regions() / regions_per_thread;
}
void set_max_workers(uint max_workers) override;
void do_work(uint worker_id) override;
};
void G1PreConcurrentStartTask::CLDClearClaimedMarksTask::do_work(uint worker_id) {
ClassLoaderDataGraph::clear_claimed_marks();
}
void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
// Reset marking state.
_cm->reset();
}
class NoteStartOfMarkHRClosure : public HeapRegionClosure {
public:
bool do_heap_region(HeapRegion* r) {
bool do_heap_region(HeapRegion* r) override {
r->note_start_of_marking();
return false;
}
};
void G1PreConcurrentStartTask::NoteStartOfMarkTask::do_work(uint worker_id) {
NoteStartOfMarkHRClosure start_cl;
G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&start_cl, &_claimer, worker_id);
}
void G1PreConcurrentStartTask::NoteStartOfMarkTask::set_max_workers(uint max_workers) {
_claimer.set_n_workers(max_workers);
}
G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
G1BatchedGangTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
add_serial_task(new CLDClearClaimedMarksTask());
add_serial_task(new ResetMarkingStateTask(cm));
add_parallel_task(new NoteStartOfMarkTask());
};
void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
assert_at_safepoint_on_vm_thread();
// Reset marking state.
reset();
// For each region note start of marking.
NoteStartOfMarkHRClosure startcl;
_g1h->heap_region_iterate(&startcl);
_root_regions.reset();
G1PreConcurrentStartTask cl(cause, this);
G1CollectedHeap::heap()->run_batch_task(&cl);
_gc_tracer_cm->set_gc_cause(cause);
}

@ -375,10 +375,6 @@ class G1ConcurrentMark : public CHeapObj<mtGC> {
// it has been reclaimed.
void clear_statistics(HeapRegion* r);
// Resets the global marking data structures, as well as the
// task local ones; should be called during concurrent start.
void reset();
// Resets all the marking data structures. Called when we have to restart
// marking or when marking completes (via set_non_marking_state below).
void reset_marking_for_restart();
@ -526,6 +522,10 @@ public:
// Calculates the number of concurrent GC threads to be used in the marking phase.
uint calc_active_marking_workers();
// Resets the global marking data structures, as well as the
// task local ones; should be called during concurrent start.
void reset();
// Moves all per-task cached data into global state.
void flush_all_task_caches();
// Prepare internal data structures for the next mark cycle. This includes clearing
@ -855,5 +855,4 @@ public:
virtual bool do_heap_region(HeapRegion* r);
~G1PrintRegionLivenessInfoClosure();
};
#endif // SHARE_GC_G1_G1CONCURRENTMARK_HPP

@ -148,6 +148,10 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[NonYoungFreeCSet] = new WorkerDataArray<double>("NonYoungFreeCSet", "Non-Young Free Collection Set (ms):", max_gc_threads);
_gc_par_phases[RebuildFreeList] = new WorkerDataArray<double>("RebuildFreeList", "Parallel Rebuild Free List (ms):", max_gc_threads);
_gc_par_phases[CLDClearClaimedMarks] = new WorkerDataArray<double>("CLDClearClaimedMarks", "Clear Claimed Marks (ms):", max_gc_threads);
_gc_par_phases[ResetMarkingState] = new WorkerDataArray<double>("ResetMarkingState", "Reset Marking State (ms):", max_gc_threads);
_gc_par_phases[NoteStartOfMark] = new WorkerDataArray<double>("NoteStartOfMark", "Note Start Of Mark (ms):", max_gc_threads);
reset();
}
@ -169,7 +173,6 @@ void G1GCPhaseTimes::reset() {
_root_region_scan_wait_time_ms = 0.0;
_external_accounted_time_ms = 0.0;
_recorded_prepare_heap_roots_time_ms = 0.0;
_recorded_clear_claimed_marks_time_ms = 0.0;
_recorded_young_cset_choice_time_ms = 0.0;
_recorded_non_young_cset_choice_time_ms = 0.0;
_recorded_sample_collection_set_candidates_time_ms = 0.0;
@ -286,7 +289,7 @@ size_t G1GCPhaseTimes::get_thread_work_item(GCParPhases phase, uint worker_id, u
}
// return the average time for a phase in milliseconds
double G1GCPhaseTimes::average_time_ms(GCParPhases phase) {
double G1GCPhaseTimes::average_time_ms(GCParPhases phase) const {
if (_gc_par_phases[phase] == NULL) {
return 0.0;
}
@ -375,6 +378,10 @@ void G1GCPhaseTimes::trace_count(const char* name, size_t value) const {
}
double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
const double pre_concurrent_start_ms = average_time_ms(CLDClearClaimedMarks) +
average_time_ms(ResetMarkingState) +
average_time_ms(NoteStartOfMark);
const double sum_ms = _root_region_scan_wait_time_ms +
_cur_prepare_tlab_time_ms +
_cur_concatenate_dirty_card_logs_time_ms +
@ -382,7 +389,7 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
_recorded_non_young_cset_choice_time_ms +
_cur_region_register_time +
_recorded_prepare_heap_roots_time_ms +
_recorded_clear_claimed_marks_time_ms;
pre_concurrent_start_ms;
info_time("Pre Evacuate Collection Set", sum_ms);
@ -395,9 +402,13 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
debug_time("Region Register", _cur_region_register_time);
debug_time("Prepare Heap Roots", _recorded_prepare_heap_roots_time_ms);
if (_recorded_clear_claimed_marks_time_ms > 0.0) {
debug_time("Clear Claimed Marks", _recorded_clear_claimed_marks_time_ms);
if (pre_concurrent_start_ms > 0.0) {
debug_phase(_gc_par_phases[CLDClearClaimedMarks]);
debug_phase(_gc_par_phases[ResetMarkingState]);
debug_phase(_gc_par_phases[NoteStartOfMark]);
}
return sum_ms;
}

@ -87,6 +87,9 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
#endif
EagerlyReclaimHumongousObjects,
RestorePreservedMarks,
CLDClearClaimedMarks,
ResetMarkingState,
NoteStartOfMark,
GCParPhasesSentinel
};
@ -180,8 +183,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
double _recorded_prepare_heap_roots_time_ms;
double _recorded_clear_claimed_marks_time_ms;
double _recorded_young_cset_choice_time_ms;
double _recorded_non_young_cset_choice_time_ms;
@ -255,7 +256,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
size_t get_thread_work_item(GCParPhases phase, uint worker_id, uint index = 0);
// return the average time for a phase in milliseconds
double average_time_ms(GCParPhases phase);
double average_time_ms(GCParPhases phase) const;
size_t sum_thread_work_items(GCParPhases phase, uint index = 0);
@ -375,10 +376,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
_recorded_prepare_heap_roots_time_ms = recorded_prepare_heap_roots_time_ms;
}
void record_clear_claimed_marks_time_ms(double recorded_clear_claimed_marks_time_ms) {
_recorded_clear_claimed_marks_time_ms = recorded_clear_claimed_marks_time_ms;
}
double cur_collection_start_sec() {
return _cur_collection_start_sec;
}