8159979: During initial mark, preparing all regions for marking may take a significant amount of time

Reviewed-by: tschatzl, ayang
2021-08-31 12:30:14 +00:00 · 2021-08-31 12:30:14 +00:00 · 841e3943c4
commit 841e3943c4
parent 98fa53357a
5 changed files with 101 additions and 34 deletions
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
@ -3567,16 +3567,8 @@ void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo* evacuation_i
  DerivedPointerTable::clear();
 #endif

-  // Concurrent start needs claim bits to keep track of the marked-through CLDs.
  if (collector_state()->in_concurrent_start_gc()) {
    concurrent_mark()->pre_concurrent_start(gc_cause());
-
-    double start_clear_claimed_marks = os::elapsedTime();
-
-    ClassLoaderDataGraph::clear_claimed_marks();
-
-    double recorded_clear_claimed_marks_time_ms = (os::elapsedTime() - start_clear_claimed_marks) * 1000.0;
-    phase_times()->record_clear_claimed_marks_time_ms(recorded_clear_claimed_marks_time_ms);
  }

  // Should G1EvacuationFailureALot be in effect for this GC?
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
@ -27,6 +27,7 @@
 #include "classfile/systemDictionary.hpp"
 #include "code/codeCache.hpp"
 #include "gc/g1/g1BarrierSet.hpp"
+#include "gc/g1/g1BatchedGangTask.hpp"
 #include "gc/g1/g1CardSetMemory.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1CollectorState.hpp"
@ -40,6 +41,7 @@
 #include "gc/g1/g1ThreadLocalData.hpp"
 #include "gc/g1/g1Trace.hpp"
 #include "gc/g1/heapRegion.inline.hpp"
+#include "gc/g1/heapRegionManager.hpp"
 #include "gc/g1/heapRegionRemSet.inline.hpp"
 #include "gc/g1/heapRegionSet.inline.hpp"
 #include "gc/shared/gcId.hpp"
@ -471,6 +473,8 @@ void G1ConcurrentMark::reset() {
    _top_at_rebuild_starts[i] = NULL;
    _region_mark_stats[i].clear();
  }
+
+  _root_regions.reset();
 }

 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) {
@ -729,25 +733,89 @@ void G1ConcurrentMark::clear_next_bitmap(WorkGang* workers) {
  clear_next_bitmap(workers, false);
 }

+class G1PreConcurrentStartTask : public G1BatchedGangTask {
+  // Concurrent start needs claim bits to keep track of the marked-through CLDs.
+  class CLDClearClaimedMarksTask;
+  // Reset marking state.
+  class ResetMarkingStateTask;
+  // For each region note start of marking.
+  class NoteStartOfMarkTask;
+
+public:
+  G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
+};
+
+class G1PreConcurrentStartTask::CLDClearClaimedMarksTask : public G1AbstractSubTask {
+public:
+  CLDClearClaimedMarksTask() : G1AbstractSubTask(G1GCPhaseTimes::CLDClearClaimedMarks) { }
+
+  double worker_cost() const override { return 1.0; }
+  void do_work(uint worker_id) override;
+};
+
+class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
+  G1ConcurrentMark* _cm;
+public:
+  ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
+
+  double worker_cost() const override { return 1.0; }
+  void do_work(uint worker_id) override;
+};
+
+class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
+  HeapRegionClaimer _claimer;
+public:
+  NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _claimer(0) { }
+
+  double worker_cost() const override {
+    // The work done per region is very small, therefore we choose this magic number to cap the number
+    // of threads used when there are few regions.
+    const uint regions_per_thread = 1000;
+    return _claimer.n_regions() / regions_per_thread;
+  }
+
+  void set_max_workers(uint max_workers) override;
+  void do_work(uint worker_id) override;
+};
+
+void G1PreConcurrentStartTask::CLDClearClaimedMarksTask::do_work(uint worker_id) {
+  ClassLoaderDataGraph::clear_claimed_marks();
+}
+
+void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
+  // Reset marking state.
+  _cm->reset();
+}
+
 class NoteStartOfMarkHRClosure : public HeapRegionClosure {
 public:
-  bool do_heap_region(HeapRegion* r) {
+  bool do_heap_region(HeapRegion* r) override {
    r->note_start_of_marking();
    return false;
  }
 };

+void G1PreConcurrentStartTask::NoteStartOfMarkTask::do_work(uint worker_id) {
+  NoteStartOfMarkHRClosure start_cl;
+  G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&start_cl, &_claimer, worker_id);
+}
+
+void G1PreConcurrentStartTask::NoteStartOfMarkTask::set_max_workers(uint max_workers) {
+  _claimer.set_n_workers(max_workers);
+}
+
+G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
+  G1BatchedGangTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
+  add_serial_task(new CLDClearClaimedMarksTask());
+  add_serial_task(new ResetMarkingStateTask(cm));
+  add_parallel_task(new NoteStartOfMarkTask());
+};
+
 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
  assert_at_safepoint_on_vm_thread();

-  // Reset marking state.
-  reset();
-
-  // For each region note start of marking.
-  NoteStartOfMarkHRClosure startcl;
-  _g1h->heap_region_iterate(&startcl);
-
-  _root_regions.reset();
+  G1PreConcurrentStartTask cl(cause, this);
+  G1CollectedHeap::heap()->run_batch_task(&cl);

  _gc_tracer_cm->set_gc_cause(cause);
 }
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp
@ -375,10 +375,6 @@ class G1ConcurrentMark : public CHeapObj<mtGC> {
  // it has been reclaimed.
  void clear_statistics(HeapRegion* r);

-  // Resets the global marking data structures, as well as the
-  // task local ones; should be called during concurrent start.
-  void reset();
-
  // Resets all the marking data structures. Called when we have to restart
  // marking or when marking completes (via set_non_marking_state below).
  void reset_marking_for_restart();
@ -526,6 +522,10 @@ public:
  // Calculates the number of concurrent GC threads to be used in the marking phase.
  uint calc_active_marking_workers();

+  // Resets the global marking data structures, as well as the
+  // task local ones; should be called during concurrent start.
+  void reset();
+
  // Moves all per-task cached data into global state.
  void flush_all_task_caches();
  // Prepare internal data structures for the next mark cycle. This includes clearing
@ -855,5 +855,4 @@ public:
  virtual bool do_heap_region(HeapRegion* r);
  ~G1PrintRegionLivenessInfoClosure();
 };
-
 #endif // SHARE_GC_G1_G1CONCURRENTMARK_HPP
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
@ -148,6 +148,10 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
  _gc_par_phases[NonYoungFreeCSet] = new WorkerDataArray<double>("NonYoungFreeCSet", "Non-Young Free Collection Set (ms):", max_gc_threads);
  _gc_par_phases[RebuildFreeList] = new WorkerDataArray<double>("RebuildFreeList", "Parallel Rebuild Free List (ms):", max_gc_threads);

+  _gc_par_phases[CLDClearClaimedMarks] = new WorkerDataArray<double>("CLDClearClaimedMarks", "Clear Claimed Marks (ms):", max_gc_threads);
+  _gc_par_phases[ResetMarkingState] = new WorkerDataArray<double>("ResetMarkingState", "Reset Marking State (ms):", max_gc_threads);
+  _gc_par_phases[NoteStartOfMark] = new WorkerDataArray<double>("NoteStartOfMark", "Note Start Of Mark (ms):", max_gc_threads);
+
  reset();
 }

@ -169,7 +173,6 @@ void G1GCPhaseTimes::reset() {
  _root_region_scan_wait_time_ms = 0.0;
  _external_accounted_time_ms = 0.0;
  _recorded_prepare_heap_roots_time_ms = 0.0;
-  _recorded_clear_claimed_marks_time_ms = 0.0;
  _recorded_young_cset_choice_time_ms = 0.0;
  _recorded_non_young_cset_choice_time_ms = 0.0;
  _recorded_sample_collection_set_candidates_time_ms = 0.0;
@ -286,7 +289,7 @@ size_t G1GCPhaseTimes::get_thread_work_item(GCParPhases phase, uint worker_id, u
 }

 // return the average time for a phase in milliseconds
-double G1GCPhaseTimes::average_time_ms(GCParPhases phase) {
+double G1GCPhaseTimes::average_time_ms(GCParPhases phase) const {
  if (_gc_par_phases[phase] == NULL) {
    return 0.0;
  }
@ -375,6 +378,10 @@ void G1GCPhaseTimes::trace_count(const char* name, size_t value) const {
 }

 double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
+  const double pre_concurrent_start_ms = average_time_ms(CLDClearClaimedMarks) +
+                                         average_time_ms(ResetMarkingState) +
+                                         average_time_ms(NoteStartOfMark);
+
  const double sum_ms = _root_region_scan_wait_time_ms +
                        _cur_prepare_tlab_time_ms +
                        _cur_concatenate_dirty_card_logs_time_ms +
@ -382,7 +389,7 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
                        _recorded_non_young_cset_choice_time_ms +
                        _cur_region_register_time +
                        _recorded_prepare_heap_roots_time_ms +
-                        _recorded_clear_claimed_marks_time_ms;
+                        pre_concurrent_start_ms;

  info_time("Pre Evacuate Collection Set", sum_ms);

@ -395,9 +402,13 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
  debug_time("Region Register", _cur_region_register_time);

  debug_time("Prepare Heap Roots", _recorded_prepare_heap_roots_time_ms);
-  if (_recorded_clear_claimed_marks_time_ms > 0.0) {
-    debug_time("Clear Claimed Marks", _recorded_clear_claimed_marks_time_ms);
+
+  if (pre_concurrent_start_ms > 0.0) {
+    debug_phase(_gc_par_phases[CLDClearClaimedMarks]);
+    debug_phase(_gc_par_phases[ResetMarkingState]);
+    debug_phase(_gc_par_phases[NoteStartOfMark]);
  }
+
  return sum_ms;
 }

--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp
@ -87,6 +87,9 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
 #endif
    EagerlyReclaimHumongousObjects,
    RestorePreservedMarks,
+    CLDClearClaimedMarks,
+    ResetMarkingState,
+    NoteStartOfMark,
    GCParPhasesSentinel
  };

@ -180,8 +183,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {

  double _recorded_prepare_heap_roots_time_ms;

-  double _recorded_clear_claimed_marks_time_ms;
-
  double _recorded_young_cset_choice_time_ms;
  double _recorded_non_young_cset_choice_time_ms;

@ -255,7 +256,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
  size_t get_thread_work_item(GCParPhases phase, uint worker_id, uint index = 0);

  // return the average time for a phase in milliseconds
-  double average_time_ms(GCParPhases phase);
+  double average_time_ms(GCParPhases phase) const;

  size_t sum_thread_work_items(GCParPhases phase, uint index = 0);

@ -375,10 +376,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
    _recorded_prepare_heap_roots_time_ms = recorded_prepare_heap_roots_time_ms;
  }

-  void record_clear_claimed_marks_time_ms(double recorded_clear_claimed_marks_time_ms) {
-    _recorded_clear_claimed_marks_time_ms = recorded_clear_claimed_marks_time_ms;
-  }
-
  double cur_collection_start_sec() {
    return _cur_collection_start_sec;
  }