8030815: Code roots are not accounted for in region prediction

Reviewed-by: iwalulya, ayang
This commit is contained in:
Thomas Schatzl 2023-09-19 08:23:57 +00:00
parent 138542de78
commit d038571213
6 changed files with 93 additions and 27 deletions

View File

@ -80,9 +80,11 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
_card_scan_to_merge_ratio_seq(TruncatedSeqLength),
_cost_per_card_scan_ms_seq(TruncatedSeqLength),
_cost_per_card_merge_ms_seq(TruncatedSeqLength),
_cost_per_code_root_ms_seq(TruncatedSeqLength),
_cost_per_byte_copied_ms_seq(TruncatedSeqLength),
_pending_cards_seq(TruncatedSeqLength),
_rs_length_seq(TruncatedSeqLength),
_code_root_rs_length_seq(TruncatedSeqLength),
_constant_other_time_ms_seq(TruncatedSeqLength),
_young_other_cost_per_region_ms_seq(TruncatedSeqLength),
_non_young_other_cost_per_region_ms_seq(TruncatedSeqLength),
@ -104,6 +106,7 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
_card_scan_to_merge_ratio_seq.set_initial(young_card_scan_to_merge_ratio_defaults[index]);
_cost_per_card_scan_ms_seq.set_initial(young_only_cost_per_card_scan_ms_defaults[index]);
_rs_length_seq.set_initial(0);
_code_root_rs_length_seq.set_initial(0);
_cost_per_byte_copied_ms_seq.set_initial(cost_per_byte_ms_defaults[index]);
_constant_other_time_ms_seq.add(constant_other_time_ms_defaults[index]);
@ -186,6 +189,10 @@ void G1Analytics::report_cost_per_card_merge_ms(double cost_per_card_ms, bool fo
_cost_per_card_merge_ms_seq.add(cost_per_card_ms, for_young_only_phase);
}
void G1Analytics::report_cost_per_code_root_scan_ms(double cost_per_code_root_ms, bool for_young_only_phase) {
_cost_per_code_root_ms_seq.add(cost_per_code_root_ms, for_young_only_phase);
}
void G1Analytics::report_card_scan_to_merge_ratio(double merge_to_scan_ratio, bool for_young_only_phase) {
_card_scan_to_merge_ratio_seq.add(merge_to_scan_ratio, for_young_only_phase);
}
@ -214,6 +221,10 @@ void G1Analytics::report_rs_length(double rs_length, bool for_young_only_phase)
_rs_length_seq.add(rs_length, for_young_only_phase);
}
void G1Analytics::report_code_root_rs_length(double code_root_rs_length, bool for_young_only_phase) {
_code_root_rs_length_seq.add(code_root_rs_length, for_young_only_phase);
}
double G1Analytics::predict_alloc_rate_ms() const {
if (enough_samples_available(&_alloc_rate_ms_seq)) {
return predict_zero_bounded(&_alloc_rate_ms_seq);
@ -242,6 +253,10 @@ double G1Analytics::predict_card_merge_time_ms(size_t card_num, bool for_young_o
return card_num * predict_zero_bounded(&_cost_per_card_merge_ms_seq, for_young_only_phase);
}
double G1Analytics::predict_code_root_scan_time_ms(size_t code_root_num, bool for_young_only_phase) const {
return code_root_num * predict_zero_bounded(&_cost_per_code_root_ms_seq, for_young_only_phase);
}
double G1Analytics::predict_card_scan_time_ms(size_t card_num, bool for_young_only_phase) const {
return card_num * predict_zero_bounded(&_cost_per_card_scan_ms_seq, for_young_only_phase);
}
@ -274,6 +289,10 @@ size_t G1Analytics::predict_rs_length(bool for_young_only_phase) const {
return predict_size(&_rs_length_seq, for_young_only_phase);
}
size_t G1Analytics::predict_code_root_rs_length(bool for_young_only_phase) const {
return predict_size(&_code_root_rs_length_seq, for_young_only_phase);
}
size_t G1Analytics::predict_pending_cards(bool for_young_only_phase) const {
return predict_size(&_pending_cards_seq, for_young_only_phase);
}

View File

@ -57,11 +57,14 @@ class G1Analytics: public CHeapObj<mtGC> {
G1PhaseDependentSeq _cost_per_card_scan_ms_seq;
// The cost to merge a card during young-only and mixed gcs in ms.
G1PhaseDependentSeq _cost_per_card_merge_ms_seq;
// The cost to scan entries in the code root remembered set in ms.
G1PhaseDependentSeq _cost_per_code_root_ms_seq;
// The cost to copy a byte in ms.
G1PhaseDependentSeq _cost_per_byte_copied_ms_seq;
G1PhaseDependentSeq _pending_cards_seq;
G1PhaseDependentSeq _rs_length_seq;
G1PhaseDependentSeq _code_root_rs_length_seq;
TruncatedSeq _constant_other_time_ms_seq;
TruncatedSeq _young_other_cost_per_region_ms_seq;
@ -127,6 +130,7 @@ public:
void report_dirtied_cards_in_thread_buffers(size_t num_cards);
void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_only_phase);
void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_only_phase);
void report_cost_per_code_root_scan_ms(double cost_per_code_root_ms, bool for_young_only_phase);
void report_card_scan_to_merge_ratio(double cards_per_entry_ratio, bool for_young_only_phase);
void report_rs_length_diff(double rs_length_diff, bool for_young_only_phase);
void report_cost_per_byte_ms(double cost_per_byte_ms, bool for_young_only_phase);
@ -135,6 +139,7 @@ public:
void report_constant_other_time_ms(double constant_other_time_ms);
void report_pending_cards(double pending_cards, bool for_young_only_phase);
void report_rs_length(double rs_length, bool for_young_only_phase);
void report_code_root_rs_length(double code_root_rs_length, bool for_young_only_phase);
double predict_alloc_rate_ms() const;
int num_alloc_rate_ms() const;
@ -150,6 +155,8 @@ public:
double predict_card_merge_time_ms(size_t card_num, bool for_young_only_phase) const;
double predict_card_scan_time_ms(size_t card_num, bool for_young_only_phase) const;
double predict_code_root_scan_time_ms(size_t code_root_num, bool for_young_only_phase) const;
double predict_object_copy_time_ms(size_t bytes_to_copy, bool for_young_only_phase) const;
double predict_constant_other_time_ms() const;
@ -163,6 +170,7 @@ public:
double predict_cleanup_time_ms() const;
size_t predict_rs_length(bool for_young_only_phase) const;
size_t predict_code_root_rs_length(bool for_young_only_phase) const;
size_t predict_pending_cards(bool for_young_only_phase) const;
// Add a new GC of the given duration and end time to the record.

View File

@ -280,18 +280,21 @@ uint64_t G1ConcurrentRefine::adjust_threads_wait_ms() const {
class G1ConcurrentRefine::RemSetSamplingClosure : public HeapRegionClosure {
G1CollectionSet* _cset;
size_t _sampled_rs_length;
size_t _sampled_code_root_rs_length;
public:
explicit RemSetSamplingClosure(G1CollectionSet* cset) :
_cset(cset), _sampled_rs_length(0) {}
_cset(cset), _sampled_rs_length(0), _sampled_code_root_rs_length(0) {}
bool do_heap_region(HeapRegion* r) override {
size_t rs_length = r->rem_set()->occupied();
_sampled_rs_length += rs_length;
HeapRegionRemSet* rem_set = r->rem_set();
_sampled_rs_length += rem_set->occupied();
_sampled_code_root_rs_length += rem_set->code_roots_list_length();
return false;
}
size_t sampled_rs_length() const { return _sampled_rs_length; }
size_t sampled_code_root_rs_length() const { return _sampled_code_root_rs_length; }
};
// Adjust the target length (in regions) of the young gen, based on the the
@ -311,7 +314,7 @@ void G1ConcurrentRefine::adjust_young_list_target_length() {
G1CollectionSet* cset = G1CollectedHeap::heap()->collection_set();
RemSetSamplingClosure cl{cset};
cset->iterate(&cl);
_policy->revise_young_list_target_length(cl.sampled_rs_length());
_policy->revise_young_list_target_length(cl.sampled_rs_length(), cl.sampled_code_root_rs_length());
}
}

View File

@ -189,13 +189,14 @@ void G1Policy::update_young_length_bounds() {
assert(!Universe::is_fully_initialized() || SafepointSynchronize::is_at_safepoint(), "must be");
bool for_young_only_phase = collector_state()->in_young_only_phase();
update_young_length_bounds(_analytics->predict_pending_cards(for_young_only_phase),
_analytics->predict_rs_length(for_young_only_phase));
_analytics->predict_rs_length(for_young_only_phase),
_analytics->predict_code_root_rs_length(for_young_only_phase));
}
void G1Policy::update_young_length_bounds(size_t pending_cards, size_t rs_length) {
void G1Policy::update_young_length_bounds(size_t pending_cards, size_t rs_length, size_t code_root_rs_length) {
uint old_young_list_target_length = young_list_target_length();
uint new_young_list_desired_length = calculate_young_desired_length(pending_cards, rs_length);
uint new_young_list_desired_length = calculate_young_desired_length(pending_cards, rs_length, code_root_rs_length);
uint new_young_list_target_length = calculate_young_target_length(new_young_list_desired_length);
uint new_young_list_max_length = calculate_young_max_length(new_young_list_target_length);
@ -234,7 +235,9 @@ void G1Policy::update_young_length_bounds(size_t pending_cards, size_t rs_length
// value smaller than what is already allocated or what can actually be allocated.
// This return value is only an expectation.
//
uint G1Policy::calculate_young_desired_length(size_t pending_cards, size_t rs_length) const {
uint G1Policy::calculate_young_desired_length(size_t pending_cards,
size_t rs_length,
size_t code_root_rs_length) const {
uint min_young_length_by_sizer = _young_gen_sizer.min_desired_young_length();
uint max_young_length_by_sizer = _young_gen_sizer.max_desired_young_length();
@ -267,7 +270,7 @@ uint G1Policy::calculate_young_desired_length(size_t pending_cards, size_t rs_le
if (use_adaptive_young_list_length()) {
desired_eden_length_by_mmu = calculate_desired_eden_length_by_mmu();
double base_time_ms = predict_base_time_ms(pending_cards, rs_length);
double base_time_ms = predict_base_time_ms(pending_cards, rs_length, code_root_rs_length);
double retained_time_ms = predict_retained_regions_evac_time();
double total_time_ms = base_time_ms + retained_time_ms;
@ -550,13 +553,13 @@ G1GCPhaseTimes* G1Policy::phase_times() const {
return _phase_times;
}
void G1Policy::revise_young_list_target_length(size_t rs_length) {
void G1Policy::revise_young_list_target_length(size_t rs_length, size_t code_root_rs_length) {
guarantee(use_adaptive_young_list_length(), "should not call this otherwise" );
size_t thread_buffer_cards = _analytics->predict_dirtied_cards_in_thread_buffers();
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
size_t pending_cards = dcqs.num_cards() + thread_buffer_cards;
update_young_length_bounds(pending_cards, rs_length);
update_young_length_bounds(pending_cards, rs_length, code_root_rs_length);
}
void G1Policy::record_full_collection_start() {
@ -890,6 +893,17 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
}
_analytics->report_card_scan_to_merge_ratio(scan_to_merge_ratio, is_young_only_pause);
// Update prediction for code root scan
size_t const total_code_roots_scanned = p->sum_thread_work_items(G1GCPhaseTimes::CodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods) +
p->sum_thread_work_items(G1GCPhaseTimes::OptCodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods);
if (total_code_roots_scanned >= G1NumCodeRootsCostSampleThreshold) {
double avg_time_code_root_scan = average_time_ms(G1GCPhaseTimes::CodeRoots) +
average_time_ms(G1GCPhaseTimes::OptCodeRoots);
_analytics->report_cost_per_code_root_scan_ms(avg_time_code_root_scan / total_code_roots_scanned, is_young_only_pause);
}
// Update prediction for copy cost per byte
size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes);
@ -912,6 +926,7 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
_analytics->report_pending_cards((double)pending_cards_at_gc_start(), is_young_only_pause);
_analytics->report_rs_length((double)_rs_length, is_young_only_pause);
_analytics->report_code_root_rs_length((double)total_code_roots_scanned, is_young_only_pause);
}
assert(!(G1GCPauseTypeHelper::is_concurrent_start_pause(this_pause) && collector_state()->mark_or_rebuild_in_progress()),
@ -1033,7 +1048,8 @@ void G1Policy::record_young_gc_pause_end(bool evacuation_failed) {
}
double G1Policy::predict_base_time_ms(size_t pending_cards,
size_t rs_length) const {
size_t rs_length,
size_t code_root_rs_length) const {
bool in_young_only_phase = collector_state()->in_young_only_phase();
size_t unique_cards_from_rs = _analytics->predict_scan_card_num(rs_length, in_young_only_phase);
@ -1043,22 +1059,26 @@ double G1Policy::predict_base_time_ms(size_t pending_cards,
double card_merge_time = _analytics->predict_card_merge_time_ms(pending_cards + rs_length, in_young_only_phase);
double card_scan_time = _analytics->predict_card_scan_time_ms(effective_scanned_cards, in_young_only_phase);
double code_root_scan_time = _analytics->predict_code_root_scan_time_ms(code_root_rs_length, in_young_only_phase);
double constant_other_time = _analytics->predict_constant_other_time_ms();
double survivor_evac_time = predict_survivor_regions_evac_time();
double total_time = card_merge_time + card_scan_time + constant_other_time + survivor_evac_time;
double total_time = card_merge_time + card_scan_time + code_root_scan_time + constant_other_time + survivor_evac_time;
log_trace(gc, ergo, heap)("Predicted base time: total %f lb_cards %zu rs_length %zu effective_scanned_cards %zu "
"card_merge_time %f card_scan_time %f constant_other_time %f survivor_evac_time %f",
"card_merge_time %f card_scan_time %f code_root_rs_length %zu code_root_scan_time %f "
"constant_other_time %f survivor_evac_time %f",
total_time, pending_cards, rs_length, effective_scanned_cards,
card_merge_time, card_scan_time, constant_other_time, survivor_evac_time);
card_merge_time, card_scan_time, code_root_rs_length, code_root_scan_time,
constant_other_time, survivor_evac_time);
return total_time;
}
double G1Policy::predict_base_time_ms(size_t pending_cards) const {
bool for_young_only_phase = collector_state()->in_young_only_phase();
size_t rs_length = _analytics->predict_rs_length(for_young_only_phase);
return predict_base_time_ms(pending_cards, rs_length);
size_t code_root_rs_length = _analytics->predict_code_root_rs_length(for_young_only_phase);
return predict_base_time_ms(pending_cards, rs_length, code_root_rs_length);
}
size_t G1Policy::predict_bytes_to_copy(HeapRegion* hr) const {
@ -1100,10 +1120,18 @@ double G1Policy::predict_region_merge_scan_time(HeapRegion* hr, bool for_young_o
_analytics->predict_card_scan_time_ms(scan_card_num, for_young_only_phase);
}
double G1Policy::predict_region_code_root_scan_time(HeapRegion* hr, bool for_young_only_phase) const {
size_t code_root_length = hr->rem_set()->code_roots_list_length();
return
_analytics->predict_code_root_scan_time_ms(code_root_length, for_young_only_phase);
}
double G1Policy::predict_region_non_copy_time_ms(HeapRegion* hr,
bool for_young_only_phase) const {
double region_elapsed_time_ms = predict_region_merge_scan_time(hr, for_young_only_phase);
double region_elapsed_time_ms = predict_region_merge_scan_time(hr, for_young_only_phase) +
predict_region_code_root_scan_time(hr, for_young_only_phase);
// The prediction of the "other" time for this region is based
// upon the region type and NOT the GC type.
if (hr->is_young()) {

View File

@ -142,12 +142,15 @@ private:
// Base time contains handling remembered sets and constant other time of the
// whole young gen, refinement buffers, and copying survivors.
// Basically everything but copying eden regions.
double predict_base_time_ms(size_t pending_cards, size_t rs_length) const;
double predict_base_time_ms(size_t pending_cards, size_t rs_length, size_t code_root_length) const;
// Copy time for a region is copying live data.
double predict_region_copy_time_ms(HeapRegion* hr, bool for_young_only_phase) const;
// Merge-scan time for a region is handling remembered sets of that region (as a single unit).
// Merge-scan time for a region is handling card-based remembered sets of that region
// (as a single unit).
double predict_region_merge_scan_time(HeapRegion* hr, bool for_young_only_phase) const;
// Code root scan time prediction for the given region.
double predict_region_code_root_scan_time(HeapRegion* hr, bool for_young_only_phase) const;
// Non-copy time for a region is handling remembered sets and other time.
double predict_region_non_copy_time_ms(HeapRegion* hr, bool for_young_only_phase) const;
@ -207,10 +210,10 @@ private:
double _mark_cleanup_start_sec;
// Updates the internal young gen maximum and target and desired lengths.
// If no parameters are passed, predict pending cards and the RS length using
// the prediction model.
// If no parameters are passed, predict pending cards, card set remset length and
// code root remset length using the prediction model.
void update_young_length_bounds();
void update_young_length_bounds(size_t pending_cards, size_t rs_length);
void update_young_length_bounds(size_t pending_cards, size_t rs_length, size_t code_root_rs_length);
// Calculate and return the minimum desired eden length based on the MMU target.
uint calculate_desired_eden_length_by_mmu() const;
@ -238,7 +241,7 @@ private:
// Calculate desired young length based on current situation without taking actually
// available free regions into account.
uint calculate_young_desired_length(size_t pending_cards, size_t rs_length) const;
uint calculate_young_desired_length(size_t pending_cards, size_t rs_length, size_t code_root_rs_length) const;
// Limit the given desired young length to available free regions.
uint calculate_young_target_length(uint desired_young_length) const;
// The GCLocker might cause us to need more regions than the target. Calculate
@ -301,7 +304,7 @@ public:
// Check the current value of the young list RSet length and
// compare it against the last prediction. If the current value is
// higher, recalculate the young list target length prediction.
void revise_young_list_target_length(size_t rs_length);
void revise_young_list_target_length(size_t rs_length, size_t code_root_rs_length);
// This should be called after the heap is resized.
void record_new_heap_size(uint new_number_of_regions);

View File

@ -325,9 +325,14 @@
range(1, 256) \
\
product(uint, G1NumCardsCostSampleThreshold, 1000, DIAGNOSTIC, \
"Threshold for the number of cards when reporting card cost " \
"related prediction sample. That sample must involve the same or "\
"more than that number of cards to be used.") \
"Threshold for the number of cards when reporting remembered set "\
"card cost related prediction samples. A sample must involve " \
"the same or more than that number of cards to be used.") \
\
product(uint, G1NumCodeRootsCostSampleThreshold, 100, DIAGNOSTIC, \
"Threshold for the number of code roots when reporting code root "\
"scan cost related prediction samples. A sample must involve " \
"the same or more than this number of code roots to be used.") \
\
GC_G1_EVACUATION_FAILURE_FLAGS(develop, \
develop_pd, \