8288966: Better handle very spiky promotion in G1

Reviewed-by: iwalulya, kbarrett
This commit is contained in:
Thomas Schatzl 2022-08-22 09:07:34 +00:00
parent 07c797720d
commit 7b5f9edb59
8 changed files with 90 additions and 18 deletions

View File

@ -296,6 +296,24 @@ HeapWord* G1Allocator::old_attempt_allocation(size_t min_word_size,
G1PLABAllocator::G1PLABAllocator(G1Allocator* allocator) : G1PLABAllocator::G1PLABAllocator(G1Allocator* allocator) :
_g1h(G1CollectedHeap::heap()), _g1h(G1CollectedHeap::heap()),
_allocator(allocator) { _allocator(allocator) {
if (ResizePLAB) {
// See G1EvacStats::compute_desired_plab_sz for the reasoning why this is the
// expected number of refills.
double const ExpectedNumberOfRefills = G1LastPLABAverageOccupancy / TargetPLABWastePct;
// Add some padding to the threshold to not boost exactly when the targeted refills
// were reached.
// E.g. due to limitation of PLAB size to non-humongous objects and region boundaries
// a thread may experience more refills than expected. Keeping the PLAB waste low
// is the main goal, so being a bit conservative is better.
double const PadFactor = 1.5;
_tolerated_refills = MAX2(ExpectedNumberOfRefills, 1.0) * PadFactor;
} else {
// Make the tolerated refills a huge number; -1 because we add one to this
// value later and it would overflow otherwise.
_tolerated_refills = SIZE_MAX - 1;
}
for (region_type_t state = 0; state < G1HeapRegionAttr::Num; state++) { for (region_type_t state = 0; state < G1HeapRegionAttr::Num; state++) {
_direct_allocated[state] = 0; _direct_allocated[state] = 0;
uint length = alloc_buffers_length(state); uint length = alloc_buffers_length(state);
@ -305,7 +323,10 @@ G1PLABAllocator::G1PLABAllocator(G1Allocator* allocator) :
_alloc_buffers[state][node_index] = new PLAB(word_sz); _alloc_buffers[state][node_index] = new PLAB(word_sz);
} }
_num_plab_fills[state] = 0; _num_plab_fills[state] = 0;
// The initial PLAB refill should not count, hence the +1 for the first boost.
_plab_fill_counter[state] = _tolerated_refills + 1;
_num_direct_allocations[state] = 0; _num_direct_allocations[state] = 0;
_cur_desired_plab_size[state] = word_sz;
} }
} }
@ -327,18 +348,35 @@ HeapWord* G1PLABAllocator::allocate_direct_or_new_plab(G1HeapRegionAttr dest,
size_t word_sz, size_t word_sz,
bool* plab_refill_failed, bool* plab_refill_failed,
uint node_index) { uint node_index) {
size_t plab_word_size = _g1h->desired_plab_sz(dest); size_t plab_word_size = plab_size(dest.type());
size_t next_plab_word_size = plab_word_size;
bool const should_boost_plab = _plab_fill_counter[dest.type()] == 0;
if (should_boost_plab) {
next_plab_word_size = _g1h->clamp_plab_size(next_plab_word_size * 2);
}
size_t required_in_plab = PLAB::size_required_for_allocation(word_sz); size_t required_in_plab = PLAB::size_required_for_allocation(word_sz);
// Only get a new PLAB if the allocation fits and it would not waste more than // Only get a new PLAB if the allocation fits into the to-be-allocated PLAB and
// ParallelGCBufferWastePct in the existing buffer. // it would not waste more than ParallelGCBufferWastePct in the current PLAB.
if ((required_in_plab <= plab_word_size) && // Boosting the PLAB also increasingly allows more waste to occur.
if ((required_in_plab <= next_plab_word_size) &&
may_throw_away_buffer(required_in_plab, plab_word_size)) { may_throw_away_buffer(required_in_plab, plab_word_size)) {
PLAB* alloc_buf = alloc_buffer(dest, node_index); PLAB* alloc_buf = alloc_buffer(dest, node_index);
alloc_buf->retire(); guarantee(alloc_buf->words_remaining() <= required_in_plab, "must be");
_num_plab_fills[dest.type()]++; _num_plab_fills[dest.type()]++;
alloc_buf->retire();
if (should_boost_plab) {
_plab_fill_counter[dest.type()] = _tolerated_refills;
} else {
_plab_fill_counter[dest.type()]--;
}
plab_word_size = next_plab_word_size;
_cur_desired_plab_size[dest.type()] = plab_word_size;
size_t actual_plab_size = 0; size_t actual_plab_size = 0;
HeapWord* buf = _allocator->par_allocate_during_gc(dest, HeapWord* buf = _allocator->par_allocate_during_gc(dest,
@ -376,7 +414,7 @@ void G1PLABAllocator::undo_allocation(G1HeapRegionAttr dest, HeapWord* obj, size
alloc_buffer(dest, node_index)->undo_allocation(obj, word_sz); alloc_buffer(dest, node_index)->undo_allocation(obj, word_sz);
} }
void G1PLABAllocator::flush_and_retire_stats() { void G1PLABAllocator::flush_and_retire_stats(uint num_workers) {
for (region_type_t state = 0; state < G1HeapRegionAttr::Num; state++) { for (region_type_t state = 0; state < G1HeapRegionAttr::Num; state++) {
G1EvacStats* stats = _g1h->alloc_buffer_stats(state); G1EvacStats* stats = _g1h->alloc_buffer_stats(state);
for (uint node_index = 0; node_index < alloc_buffers_length(state); node_index++) { for (uint node_index = 0; node_index < alloc_buffers_length(state); node_index++) {
@ -389,6 +427,16 @@ void G1PLABAllocator::flush_and_retire_stats() {
stats->add_direct_allocated(_direct_allocated[state]); stats->add_direct_allocated(_direct_allocated[state]);
stats->add_num_direct_allocated(_num_direct_allocations[state]); stats->add_num_direct_allocated(_num_direct_allocations[state]);
} }
log_trace(gc, plab)("PLAB boost: Young %zu -> %zu refills %zu (tolerated %zu) Old %zu -> %zu refills %zu (tolerated %zu)",
_g1h->alloc_buffer_stats(G1HeapRegionAttr::Young)->desired_plab_size(num_workers),
plab_size(G1HeapRegionAttr::Young),
_num_plab_fills[G1HeapRegionAttr::Young],
_tolerated_refills,
_g1h->alloc_buffer_stats(G1HeapRegionAttr::Old)->desired_plab_size(num_workers),
plab_size(G1HeapRegionAttr::Old),
_num_plab_fills[G1HeapRegionAttr::Old],
_tolerated_refills);
} }
size_t G1PLABAllocator::waste() const { size_t G1PLABAllocator::waste() const {
@ -404,6 +452,10 @@ size_t G1PLABAllocator::waste() const {
return result; return result;
} }
size_t G1PLABAllocator::plab_size(G1HeapRegionAttr which) const {
return _cur_desired_plab_size[which.type()];
}
size_t G1PLABAllocator::undo_waste() const { size_t G1PLABAllocator::undo_waste() const {
size_t result = 0; size_t result = 0;
for (region_type_t state = 0; state < G1HeapRegionAttr::Num; state++) { for (region_type_t state = 0; state < G1HeapRegionAttr::Num; state++) {

View File

@ -165,7 +165,16 @@ private:
size_t _num_plab_fills[G1HeapRegionAttr::Num]; size_t _num_plab_fills[G1HeapRegionAttr::Num];
size_t _num_direct_allocations[G1HeapRegionAttr::Num]; size_t _num_direct_allocations[G1HeapRegionAttr::Num];
void flush_and_retire_stats(); size_t _plab_fill_counter[G1HeapRegionAttr::Num];
// Current desired PLAB size incorporating eventual boosting.
size_t _cur_desired_plab_size[G1HeapRegionAttr::Num];
// The amount of PLAB refills tolerated until boosting PLAB size.
// This value is the same for all generations because they all use the same
// resizing logic.
size_t _tolerated_refills;
void flush_and_retire_stats(uint num_workers);
inline PLAB* alloc_buffer(G1HeapRegionAttr dest, uint node_index) const; inline PLAB* alloc_buffer(G1HeapRegionAttr dest, uint node_index) const;
inline PLAB* alloc_buffer(region_type_t dest, uint node_index) const; inline PLAB* alloc_buffer(region_type_t dest, uint node_index) const;
@ -181,6 +190,7 @@ public:
size_t waste() const; size_t waste() const;
size_t undo_waste() const; size_t undo_waste() const;
size_t plab_size(G1HeapRegionAttr which) const;
// Allocate word_sz words in dest, either directly into the regions or by // Allocate word_sz words in dest, either directly into the regions or by
// allocating a new PLAB. Returns the address of the allocated memory, NULL if // allocating a new PLAB. Returns the address of the allocated memory, NULL if

View File

@ -560,6 +560,12 @@ public:
// Determines PLAB size for a given destination. // Determines PLAB size for a given destination.
inline size_t desired_plab_sz(G1HeapRegionAttr dest); inline size_t desired_plab_sz(G1HeapRegionAttr dest);
// Clamp the given PLAB word size to allowed values. Prevents humongous PLAB sizes
// for two reasons:
// * PLABs are allocated using a similar paths as oops, but should
// never be in a humongous region
// * Allowing humongous PLABs needlessly churns the region free lists
inline size_t clamp_plab_size(size_t value) const;
// Do anything common to GC's. // Do anything common to GC's.
void gc_prologue(bool full); void gc_prologue(bool full);

View File

@ -67,11 +67,11 @@ G1EvacStats* G1CollectedHeap::alloc_buffer_stats(G1HeapRegionAttr dest) {
size_t G1CollectedHeap::desired_plab_sz(G1HeapRegionAttr dest) { size_t G1CollectedHeap::desired_plab_sz(G1HeapRegionAttr dest) {
size_t gclab_word_size = alloc_buffer_stats(dest)->desired_plab_size(workers()->active_workers()); size_t gclab_word_size = alloc_buffer_stats(dest)->desired_plab_size(workers()->active_workers());
// Prevent humongous PLAB sizes for two reasons: return clamp_plab_size(gclab_word_size);
// * PLABs are allocated using a similar paths as oops, but should }
// never be in a humongous region
// * Allowing humongous PLABs needlessly churns the region free lists inline size_t G1CollectedHeap::clamp_plab_size(size_t value) const {
return MIN2(_humongous_object_threshold_in_words, gclab_word_size); return clamp(value, PLAB::min_size(), _humongous_object_threshold_in_words);
} }
// Inline functions for G1CollectedHeap // Inline functions for G1CollectedHeap

View File

@ -82,8 +82,11 @@ size_t G1EvacStats::compute_desired_plab_size() const {
// TargetPLABWastePct of 10 had been set. // TargetPLABWastePct of 10 had been set.
// //
// So we could waste up to 10 words to meet that percentage. Given that we // So we could waste up to 10 words to meet that percentage. Given that we
// also assume that that buffer is typically half-full, the new desired PLAB // also assume that that buffer is typically half-full (G1LastPLABAverageOccupancy),
// size is set to 20 words. // the new desired PLAB size is set to 20 words.
//
// (This also implies that we expect G1LastPLABAverageOccupancy/TargetPLABWastePct
// number of refills during allocation).
// //
// The amount of allocation performed should be independent of the number of // The amount of allocation performed should be independent of the number of
// threads, so should the maximum waste we can spend in total. So if // threads, so should the maximum waste we can spend in total. So if

View File

@ -140,6 +140,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
enum GCMergePSSWorkItems { enum GCMergePSSWorkItems {
MergePSSCopiedBytes, MergePSSCopiedBytes,
MergePSSLABSize,
MergePSSLABWasteBytes, MergePSSLABWasteBytes,
MergePSSLABUndoWasteBytes MergePSSLABUndoWasteBytes
}; };

View File

@ -111,11 +111,11 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
initialize_numa_stats(); initialize_numa_stats();
} }
size_t G1ParScanThreadState::flush(size_t* surviving_young_words) { size_t G1ParScanThreadState::flush(size_t* surviving_young_words, uint num_workers) {
_rdc_local_qset.flush(); _rdc_local_qset.flush();
flush_numa_stats(); flush_numa_stats();
// Update allocation statistics. // Update allocation statistics.
_plab_allocator->flush_and_retire_stats(); _plab_allocator->flush_and_retire_stats(num_workers);
_g1h->policy()->record_age_table(&_age_table); _g1h->policy()->record_age_table(&_age_table);
if (_evacuation_failed_info.has_failed()) { if (_evacuation_failed_info.has_failed()) {
@ -592,7 +592,7 @@ void G1ParScanThreadStateSet::flush() {
// because it resets the PLAB allocator where we get this info from. // because it resets the PLAB allocator where we get this info from.
size_t lab_waste_bytes = pss->lab_waste_words() * HeapWordSize; size_t lab_waste_bytes = pss->lab_waste_words() * HeapWordSize;
size_t lab_undo_waste_bytes = pss->lab_undo_waste_words() * HeapWordSize; size_t lab_undo_waste_bytes = pss->lab_undo_waste_words() * HeapWordSize;
size_t copied_bytes = pss->flush(_surviving_young_words_total) * HeapWordSize; size_t copied_bytes = pss->flush(_surviving_young_words_total, _n_workers) * HeapWordSize;
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, copied_bytes, G1GCPhaseTimes::MergePSSCopiedBytes); p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, copied_bytes, G1GCPhaseTimes::MergePSSCopiedBytes);
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_waste_bytes, G1GCPhaseTimes::MergePSSLABWasteBytes); p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_waste_bytes, G1GCPhaseTimes::MergePSSLABWasteBytes);

View File

@ -152,7 +152,7 @@ public:
// Pass locally gathered statistics to global state. Returns the total number of // Pass locally gathered statistics to global state. Returns the total number of
// HeapWords copied. // HeapWords copied.
size_t flush(size_t* surviving_young_words); size_t flush(size_t* surviving_young_words, uint num_workers);
private: private:
void do_partial_array(PartialArrayScanTask task); void do_partial_array(PartialArrayScanTask task);