8294850: Make rs length/pending card predictors dependent on gc phase

Reviewed-by: kbarrett, iwalulya, ayang
2022-10-14 12:46:48 +00:00 · 2022-10-14 12:46:48 +00:00 · 64813f48c8
commit 64813f48c8
parent 21e4f06ada
3 changed files with 65 additions and 41 deletions
--- a/src/hotspot/share/gc/g1/g1Analytics.cpp
+++ b/src/hotspot/share/gc/g1/g1Analytics.cpp
@ -78,7 +78,8 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
    _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
    _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
    _prev_collection_pause_end_ms(0.0),
-    _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _young_rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _mixed_rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
    _concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
    _dirtied_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
    _young_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
@ -91,8 +92,10 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
    _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
    _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
    _non_young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _pending_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _rs_length_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _young_pending_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _mixed_pending_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _young_rs_length_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _mixed_rs_length_seq(new TruncatedSeq(TruncatedSeqLength)),
    _cost_per_byte_ms_during_cm_seq(new TruncatedSeq(TruncatedSeqLength)),
    _recent_prev_end_times_for_all_gcs_sec(new TruncatedSeq(NumPrevPausesForHeuristics)),
    _long_term_pause_time_ratio(0.0),
@ -104,7 +107,7 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :

  int index = MIN2(ParallelGCThreads - 1, 7u);

-  _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
+  _young_rs_length_diff_seq->add(rs_length_diff_defaults[index]);
  // Start with inverse of maximum STW cost.
  _concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]);
  // Some applications have very low rates for logging cards.
@ -193,8 +196,12 @@ void G1Analytics::report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bo
  }
 }

-void G1Analytics::report_rs_length_diff(double rs_length_diff) {
-  _rs_length_diff_seq->add(rs_length_diff);
+void G1Analytics::report_rs_length_diff(double rs_length_diff, bool for_young_gc) {
+  if (for_young_gc) {
+    _young_rs_length_diff_seq->add(rs_length_diff);
+  } else {
+    _mixed_rs_length_diff_seq->add(rs_length_diff);
+  }
 }

 void G1Analytics::report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress) {
@ -217,12 +224,20 @@ void G1Analytics::report_constant_other_time_ms(double constant_other_time_ms) {
  _constant_other_time_ms_seq->add(constant_other_time_ms);
 }

-void G1Analytics::report_pending_cards(double pending_cards) {
-  _pending_cards_seq->add(pending_cards);
+void G1Analytics::report_pending_cards(double pending_cards, bool for_young_gc) {
+  if (for_young_gc) {
+    _young_pending_cards_seq->add(pending_cards);
+  } else {
+    _mixed_pending_cards_seq->add(pending_cards);
+  }
 }

-void G1Analytics::report_rs_length(double rs_length) {
-  _rs_length_seq->add(rs_length);
+void G1Analytics::report_rs_length(double rs_length, bool for_young_gc) {
+  if (for_young_gc) {
+    _young_rs_length_seq->add(rs_length);
+  } else {
+    _mixed_rs_length_seq->add(rs_length);
+  }
 }

 double G1Analytics::predict_alloc_rate_ms() const {
@ -301,12 +316,20 @@ double G1Analytics::predict_cleanup_time_ms() const {
  return predict_zero_bounded(_concurrent_mark_cleanup_times_ms);
 }

-size_t G1Analytics::predict_rs_length() const {
-  return predict_size(_rs_length_seq) + predict_size(_rs_length_diff_seq);
+size_t G1Analytics::predict_rs_length(bool for_young_gc) const {
+  if (for_young_gc || !enough_samples_available(_mixed_rs_length_seq)) {
+    return predict_size(_young_rs_length_seq) + predict_size(_young_rs_length_diff_seq);
+  } else {
+    return predict_size(_mixed_rs_length_seq) + predict_size(_mixed_rs_length_diff_seq);
+  }
 }

-size_t G1Analytics::predict_pending_cards() const {
-  return predict_size(_pending_cards_seq);
+size_t G1Analytics::predict_pending_cards(bool for_young_gc) const {
+  if (for_young_gc || !enough_samples_available(_mixed_pending_cards_seq)) {
+    return predict_size(_young_pending_cards_seq);
+  } else {
+    return predict_size(_mixed_pending_cards_seq);
+  }
 }

 double G1Analytics::oldest_known_gc_end_time_sec() const {
--- a/src/hotspot/share/gc/g1/g1Analytics.hpp
+++ b/src/hotspot/share/gc/g1/g1Analytics.hpp
@ -45,7 +45,8 @@ class G1Analytics: public CHeapObj<mtGC> {
  TruncatedSeq* _alloc_rate_ms_seq;
  double        _prev_collection_pause_end_ms;

-  TruncatedSeq* _rs_length_diff_seq;
+  TruncatedSeq* _young_rs_length_diff_seq;
+  TruncatedSeq* _mixed_rs_length_diff_seq;
  TruncatedSeq* _concurrent_refine_rate_ms_seq;
  TruncatedSeq* _dirtied_cards_rate_ms_seq;
  // The ratio between the number of merged cards and actually scanned cards, for
@ -67,8 +68,10 @@ class G1Analytics: public CHeapObj<mtGC> {
  TruncatedSeq* _young_other_cost_per_region_ms_seq;
  TruncatedSeq* _non_young_other_cost_per_region_ms_seq;

-  TruncatedSeq* _pending_cards_seq;
-  TruncatedSeq* _rs_length_seq;
+  TruncatedSeq* _young_pending_cards_seq;
+  TruncatedSeq* _mixed_pending_cards_seq;
+  TruncatedSeq* _young_rs_length_seq;
+  TruncatedSeq* _mixed_rs_length_seq;

  TruncatedSeq* _cost_per_byte_ms_during_cm_seq;

@ -126,13 +129,13 @@ public:
  void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_gc);
  void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc);
  void report_card_merge_to_scan_ratio(double cards_per_entry_ratio, bool for_young_gc);
-  void report_rs_length_diff(double rs_length_diff);
+  void report_rs_length_diff(double rs_length_diff, bool for_young_gc);
  void report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress);
  void report_young_other_cost_per_region_ms(double other_cost_per_region_ms);
  void report_non_young_other_cost_per_region_ms(double other_cost_per_region_ms);
  void report_constant_other_time_ms(double constant_other_time_ms);
-  void report_pending_cards(double pending_cards);
-  void report_rs_length(double rs_length);
+  void report_pending_cards(double pending_cards, bool for_young_gc);
+  void report_rs_length(double rs_length, bool for_young_gc);

  double predict_alloc_rate_ms() const;
  int num_alloc_rate_ms() const;
@ -161,8 +164,8 @@ public:

  double predict_cleanup_time_ms() const;

-  size_t predict_rs_length() const;
-  size_t predict_pending_cards() const;
+  size_t predict_rs_length(bool for_young_gc) const;
+  size_t predict_pending_cards(bool for_young_gc) const;

  // Add a new GC of the given duration and end time to the record.
  void update_recent_gc_times(double end_time_sec, double elapsed_ms);
--- a/src/hotspot/share/gc/g1/g1Policy.cpp
+++ b/src/hotspot/share/gc/g1/g1Policy.cpp
@ -194,8 +194,9 @@ uint G1Policy::calculate_desired_eden_length_by_mmu() const {
 }

 void G1Policy::update_young_length_bounds() {
-  update_young_length_bounds(_analytics->predict_pending_cards(),
-                             _analytics->predict_rs_length());
+  bool for_young_gc = collector_state()->in_young_only_phase();
+  update_young_length_bounds(_analytics->predict_pending_cards(for_young_gc),
+                             _analytics->predict_rs_length(for_young_gc));
 }

 void G1Policy::update_young_length_bounds(size_t pending_cards, size_t rs_length) {
@ -525,14 +526,15 @@ void G1Policy::revise_young_list_target_length_if_necessary(size_t rs_length) {
    update_rs_length_prediction(rs_length_prediction);

    G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
-    // We have no measure of the number of cards in the thread log buffers, assume
-    // these are very few compared to the sum of the two other sources.
+    // We have no measure of the number of cards in the thread buffers, assume
+    // these are very few compared to the ones in the DCQS.
    update_young_length_bounds(dcqs.num_cards(), rs_length_prediction);
  }
 }

 void G1Policy::update_rs_length_prediction() {
-  update_rs_length_prediction(_analytics->predict_rs_length());
+  bool for_young_gc = collector_state()->in_young_only_phase();
+  update_rs_length_prediction(_analytics->predict_rs_length(for_young_gc));
 }

 void G1Policy::update_rs_length_prediction(size_t prediction) {
@ -753,6 +755,7 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
  double pause_time_ms = (end_time_sec - start_time_sec) * 1000.0;

  G1GCPauseType this_pause = collector_state()->young_gc_pause_type(concurrent_operation_is_full_mark);
+  bool is_young_only_pause = G1GCPauseTypeHelper::is_young_only_pause(this_pause);

  if (G1GCPauseTypeHelper::is_concurrent_start_pause(this_pause)) {
    record_concurrent_mark_init_end();
@ -806,7 +809,7 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
      maybe_start_marking();
    }
  } else {
-    assert(G1GCPauseTypeHelper::is_young_only_pause(this_pause), "must be");
+    assert(is_young_only_pause, "must be");
  }

  _eden_surv_rate_group->start_adding_regions();
@ -830,8 +833,7 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
                                    average_time_ms(G1GCPhaseTimes::MergeHCC) +
                                    average_time_ms(G1GCPhaseTimes::MergeLB) +
                                    average_time_ms(G1GCPhaseTimes::OptMergeRS);
-      _analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / total_cards_merged,
-                                                G1GCPauseTypeHelper::is_young_only_pause(this_pause));
+      _analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / total_cards_merged, is_young_only_pause);
    }

    // Update prediction for card scan
@ -842,8 +844,7 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
      double avg_time_dirty_card_scan = average_time_ms(G1GCPhaseTimes::ScanHR) +
                                        average_time_ms(G1GCPhaseTimes::OptScanHR);

-      _analytics->report_cost_per_card_scan_ms(avg_time_dirty_card_scan / total_cards_scanned,
-                                               G1GCPauseTypeHelper::is_young_only_pause(this_pause));
+      _analytics->report_cost_per_card_scan_ms(avg_time_dirty_card_scan / total_cards_scanned, is_young_only_pause);
    }

    // Update prediction for the ratio between cards from the remembered
@ -857,12 +858,11 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
    if (total_cards_scanned > 0) {
      merge_to_scan_ratio = (double) from_rs_length_cards / total_cards_scanned;
    }
-    _analytics->report_card_merge_to_scan_ratio(merge_to_scan_ratio,
-                                                G1GCPauseTypeHelper::is_young_only_pause(this_pause));
+    _analytics->report_card_merge_to_scan_ratio(merge_to_scan_ratio, is_young_only_pause);

    const size_t recorded_rs_length = _collection_set->recorded_rs_length();
    const size_t rs_length_diff = _rs_length > recorded_rs_length ? _rs_length - recorded_rs_length : 0;
-    _analytics->report_rs_length_diff(rs_length_diff);
+    _analytics->report_rs_length_diff(rs_length_diff, is_young_only_pause);

    // Update prediction for copy cost per byte
    size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes);
@ -887,11 +887,8 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
    // Do not update RS lengths and the number of pending cards with information from mixed gc:
    // these are is wildly different to during young only gc and mess up young gen sizing right
    // after the mixed gc phase.
-    // During mixed gc we do not use them for young gen sizing.
-    if (G1GCPauseTypeHelper::is_young_only_pause(this_pause)) {
-      _analytics->report_pending_cards((double) _pending_cards_at_gc_start);
-      _analytics->report_rs_length((double) _rs_length);
-    }
+    _analytics->report_pending_cards((double) _pending_cards_at_gc_start, is_young_only_pause);
+    _analytics->report_rs_length((double) _rs_length, is_young_only_pause);
  }

  assert(!(G1GCPauseTypeHelper::is_concurrent_start_pause(this_pause) && collector_state()->mark_or_rebuild_in_progress()),
@ -1036,7 +1033,8 @@ double G1Policy::predict_base_time_ms(size_t pending_cards,
 }

 double G1Policy::predict_base_time_ms(size_t pending_cards) const {
-  size_t rs_length = _analytics->predict_rs_length();
+  bool for_young_gc = collector_state()->in_young_only_phase();
+  size_t rs_length = _analytics->predict_rs_length(for_young_gc);
  return predict_base_time_ms(pending_cards, rs_length);
 }