8288966: Better handle very spiky promotion in G1
Reviewed-by: iwalulya, kbarrett
This commit is contained in:
parent
07c797720d
commit
7b5f9edb59
@ -296,6 +296,24 @@ HeapWord* G1Allocator::old_attempt_allocation(size_t min_word_size,
|
||||
G1PLABAllocator::G1PLABAllocator(G1Allocator* allocator) :
|
||||
_g1h(G1CollectedHeap::heap()),
|
||||
_allocator(allocator) {
|
||||
|
||||
if (ResizePLAB) {
|
||||
// See G1EvacStats::compute_desired_plab_sz for the reasoning why this is the
|
||||
// expected number of refills.
|
||||
double const ExpectedNumberOfRefills = G1LastPLABAverageOccupancy / TargetPLABWastePct;
|
||||
// Add some padding to the threshold to not boost exactly when the targeted refills
|
||||
// were reached.
|
||||
// E.g. due to limitation of PLAB size to non-humongous objects and region boundaries
|
||||
// a thread may experience more refills than expected. Keeping the PLAB waste low
|
||||
// is the main goal, so being a bit conservative is better.
|
||||
double const PadFactor = 1.5;
|
||||
_tolerated_refills = MAX2(ExpectedNumberOfRefills, 1.0) * PadFactor;
|
||||
} else {
|
||||
// Make the tolerated refills a huge number; -1 because we add one to this
|
||||
// value later and it would overflow otherwise.
|
||||
_tolerated_refills = SIZE_MAX - 1;
|
||||
}
|
||||
|
||||
for (region_type_t state = 0; state < G1HeapRegionAttr::Num; state++) {
|
||||
_direct_allocated[state] = 0;
|
||||
uint length = alloc_buffers_length(state);
|
||||
@ -305,7 +323,10 @@ G1PLABAllocator::G1PLABAllocator(G1Allocator* allocator) :
|
||||
_alloc_buffers[state][node_index] = new PLAB(word_sz);
|
||||
}
|
||||
_num_plab_fills[state] = 0;
|
||||
// The initial PLAB refill should not count, hence the +1 for the first boost.
|
||||
_plab_fill_counter[state] = _tolerated_refills + 1;
|
||||
_num_direct_allocations[state] = 0;
|
||||
_cur_desired_plab_size[state] = word_sz;
|
||||
}
|
||||
}
|
||||
|
||||
@ -327,18 +348,35 @@ HeapWord* G1PLABAllocator::allocate_direct_or_new_plab(G1HeapRegionAttr dest,
|
||||
size_t word_sz,
|
||||
bool* plab_refill_failed,
|
||||
uint node_index) {
|
||||
size_t plab_word_size = _g1h->desired_plab_sz(dest);
|
||||
size_t plab_word_size = plab_size(dest.type());
|
||||
size_t next_plab_word_size = plab_word_size;
|
||||
|
||||
bool const should_boost_plab = _plab_fill_counter[dest.type()] == 0;
|
||||
if (should_boost_plab) {
|
||||
next_plab_word_size = _g1h->clamp_plab_size(next_plab_word_size * 2);
|
||||
}
|
||||
|
||||
size_t required_in_plab = PLAB::size_required_for_allocation(word_sz);
|
||||
|
||||
// Only get a new PLAB if the allocation fits and it would not waste more than
|
||||
// ParallelGCBufferWastePct in the existing buffer.
|
||||
if ((required_in_plab <= plab_word_size) &&
|
||||
// Only get a new PLAB if the allocation fits into the to-be-allocated PLAB and
|
||||
// it would not waste more than ParallelGCBufferWastePct in the current PLAB.
|
||||
// Boosting the PLAB also increasingly allows more waste to occur.
|
||||
if ((required_in_plab <= next_plab_word_size) &&
|
||||
may_throw_away_buffer(required_in_plab, plab_word_size)) {
|
||||
|
||||
PLAB* alloc_buf = alloc_buffer(dest, node_index);
|
||||
alloc_buf->retire();
|
||||
guarantee(alloc_buf->words_remaining() <= required_in_plab, "must be");
|
||||
|
||||
_num_plab_fills[dest.type()]++;
|
||||
alloc_buf->retire();
|
||||
|
||||
if (should_boost_plab) {
|
||||
_plab_fill_counter[dest.type()] = _tolerated_refills;
|
||||
} else {
|
||||
_plab_fill_counter[dest.type()]--;
|
||||
}
|
||||
plab_word_size = next_plab_word_size;
|
||||
_cur_desired_plab_size[dest.type()] = plab_word_size;
|
||||
|
||||
size_t actual_plab_size = 0;
|
||||
HeapWord* buf = _allocator->par_allocate_during_gc(dest,
|
||||
@ -376,7 +414,7 @@ void G1PLABAllocator::undo_allocation(G1HeapRegionAttr dest, HeapWord* obj, size
|
||||
alloc_buffer(dest, node_index)->undo_allocation(obj, word_sz);
|
||||
}
|
||||
|
||||
void G1PLABAllocator::flush_and_retire_stats() {
|
||||
void G1PLABAllocator::flush_and_retire_stats(uint num_workers) {
|
||||
for (region_type_t state = 0; state < G1HeapRegionAttr::Num; state++) {
|
||||
G1EvacStats* stats = _g1h->alloc_buffer_stats(state);
|
||||
for (uint node_index = 0; node_index < alloc_buffers_length(state); node_index++) {
|
||||
@ -389,6 +427,16 @@ void G1PLABAllocator::flush_and_retire_stats() {
|
||||
stats->add_direct_allocated(_direct_allocated[state]);
|
||||
stats->add_num_direct_allocated(_num_direct_allocations[state]);
|
||||
}
|
||||
|
||||
log_trace(gc, plab)("PLAB boost: Young %zu -> %zu refills %zu (tolerated %zu) Old %zu -> %zu refills %zu (tolerated %zu)",
|
||||
_g1h->alloc_buffer_stats(G1HeapRegionAttr::Young)->desired_plab_size(num_workers),
|
||||
plab_size(G1HeapRegionAttr::Young),
|
||||
_num_plab_fills[G1HeapRegionAttr::Young],
|
||||
_tolerated_refills,
|
||||
_g1h->alloc_buffer_stats(G1HeapRegionAttr::Old)->desired_plab_size(num_workers),
|
||||
plab_size(G1HeapRegionAttr::Old),
|
||||
_num_plab_fills[G1HeapRegionAttr::Old],
|
||||
_tolerated_refills);
|
||||
}
|
||||
|
||||
size_t G1PLABAllocator::waste() const {
|
||||
@ -404,6 +452,10 @@ size_t G1PLABAllocator::waste() const {
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t G1PLABAllocator::plab_size(G1HeapRegionAttr which) const {
|
||||
return _cur_desired_plab_size[which.type()];
|
||||
}
|
||||
|
||||
size_t G1PLABAllocator::undo_waste() const {
|
||||
size_t result = 0;
|
||||
for (region_type_t state = 0; state < G1HeapRegionAttr::Num; state++) {
|
||||
|
@ -165,7 +165,16 @@ private:
|
||||
size_t _num_plab_fills[G1HeapRegionAttr::Num];
|
||||
size_t _num_direct_allocations[G1HeapRegionAttr::Num];
|
||||
|
||||
void flush_and_retire_stats();
|
||||
size_t _plab_fill_counter[G1HeapRegionAttr::Num];
|
||||
// Current desired PLAB size incorporating eventual boosting.
|
||||
size_t _cur_desired_plab_size[G1HeapRegionAttr::Num];
|
||||
|
||||
// The amount of PLAB refills tolerated until boosting PLAB size.
|
||||
// This value is the same for all generations because they all use the same
|
||||
// resizing logic.
|
||||
size_t _tolerated_refills;
|
||||
|
||||
void flush_and_retire_stats(uint num_workers);
|
||||
inline PLAB* alloc_buffer(G1HeapRegionAttr dest, uint node_index) const;
|
||||
inline PLAB* alloc_buffer(region_type_t dest, uint node_index) const;
|
||||
|
||||
@ -181,6 +190,7 @@ public:
|
||||
|
||||
size_t waste() const;
|
||||
size_t undo_waste() const;
|
||||
size_t plab_size(G1HeapRegionAttr which) const;
|
||||
|
||||
// Allocate word_sz words in dest, either directly into the regions or by
|
||||
// allocating a new PLAB. Returns the address of the allocated memory, NULL if
|
||||
|
@ -560,6 +560,12 @@ public:
|
||||
|
||||
// Determines PLAB size for a given destination.
|
||||
inline size_t desired_plab_sz(G1HeapRegionAttr dest);
|
||||
// Clamp the given PLAB word size to allowed values. Prevents humongous PLAB sizes
|
||||
// for two reasons:
|
||||
// * PLABs are allocated using a similar paths as oops, but should
|
||||
// never be in a humongous region
|
||||
// * Allowing humongous PLABs needlessly churns the region free lists
|
||||
inline size_t clamp_plab_size(size_t value) const;
|
||||
|
||||
// Do anything common to GC's.
|
||||
void gc_prologue(bool full);
|
||||
|
@ -67,11 +67,11 @@ G1EvacStats* G1CollectedHeap::alloc_buffer_stats(G1HeapRegionAttr dest) {
|
||||
|
||||
size_t G1CollectedHeap::desired_plab_sz(G1HeapRegionAttr dest) {
|
||||
size_t gclab_word_size = alloc_buffer_stats(dest)->desired_plab_size(workers()->active_workers());
|
||||
// Prevent humongous PLAB sizes for two reasons:
|
||||
// * PLABs are allocated using a similar paths as oops, but should
|
||||
// never be in a humongous region
|
||||
// * Allowing humongous PLABs needlessly churns the region free lists
|
||||
return MIN2(_humongous_object_threshold_in_words, gclab_word_size);
|
||||
return clamp_plab_size(gclab_word_size);
|
||||
}
|
||||
|
||||
inline size_t G1CollectedHeap::clamp_plab_size(size_t value) const {
|
||||
return clamp(value, PLAB::min_size(), _humongous_object_threshold_in_words);
|
||||
}
|
||||
|
||||
// Inline functions for G1CollectedHeap
|
||||
|
@ -82,8 +82,11 @@ size_t G1EvacStats::compute_desired_plab_size() const {
|
||||
// TargetPLABWastePct of 10 had been set.
|
||||
//
|
||||
// So we could waste up to 10 words to meet that percentage. Given that we
|
||||
// also assume that that buffer is typically half-full, the new desired PLAB
|
||||
// size is set to 20 words.
|
||||
// also assume that that buffer is typically half-full (G1LastPLABAverageOccupancy),
|
||||
// the new desired PLAB size is set to 20 words.
|
||||
//
|
||||
// (This also implies that we expect G1LastPLABAverageOccupancy/TargetPLABWastePct
|
||||
// number of refills during allocation).
|
||||
//
|
||||
// The amount of allocation performed should be independent of the number of
|
||||
// threads, so should the maximum waste we can spend in total. So if
|
||||
|
@ -140,6 +140,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
|
||||
enum GCMergePSSWorkItems {
|
||||
MergePSSCopiedBytes,
|
||||
MergePSSLABSize,
|
||||
MergePSSLABWasteBytes,
|
||||
MergePSSLABUndoWasteBytes
|
||||
};
|
||||
|
@ -111,11 +111,11 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
|
||||
initialize_numa_stats();
|
||||
}
|
||||
|
||||
size_t G1ParScanThreadState::flush(size_t* surviving_young_words) {
|
||||
size_t G1ParScanThreadState::flush(size_t* surviving_young_words, uint num_workers) {
|
||||
_rdc_local_qset.flush();
|
||||
flush_numa_stats();
|
||||
// Update allocation statistics.
|
||||
_plab_allocator->flush_and_retire_stats();
|
||||
_plab_allocator->flush_and_retire_stats(num_workers);
|
||||
_g1h->policy()->record_age_table(&_age_table);
|
||||
|
||||
if (_evacuation_failed_info.has_failed()) {
|
||||
@ -592,7 +592,7 @@ void G1ParScanThreadStateSet::flush() {
|
||||
// because it resets the PLAB allocator where we get this info from.
|
||||
size_t lab_waste_bytes = pss->lab_waste_words() * HeapWordSize;
|
||||
size_t lab_undo_waste_bytes = pss->lab_undo_waste_words() * HeapWordSize;
|
||||
size_t copied_bytes = pss->flush(_surviving_young_words_total) * HeapWordSize;
|
||||
size_t copied_bytes = pss->flush(_surviving_young_words_total, _n_workers) * HeapWordSize;
|
||||
|
||||
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, copied_bytes, G1GCPhaseTimes::MergePSSCopiedBytes);
|
||||
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_waste_bytes, G1GCPhaseTimes::MergePSSLABWasteBytes);
|
||||
|
@ -152,7 +152,7 @@ public:
|
||||
|
||||
// Pass locally gathered statistics to global state. Returns the total number of
|
||||
// HeapWords copied.
|
||||
size_t flush(size_t* surviving_young_words);
|
||||
size_t flush(size_t* surviving_young_words, uint num_workers);
|
||||
|
||||
private:
|
||||
void do_partial_array(PartialArrayScanTask task);
|
||||
|
Loading…
Reference in New Issue
Block a user