diff --git a/src/hotspot/share/gc/g1/g1Allocator.cpp b/src/hotspot/share/gc/g1/g1Allocator.cpp index 1f9a3b862b1..815d0ec988e 100644 --- a/src/hotspot/share/gc/g1/g1Allocator.cpp +++ b/src/hotspot/share/gc/g1/g1Allocator.cpp @@ -296,6 +296,24 @@ HeapWord* G1Allocator::old_attempt_allocation(size_t min_word_size, G1PLABAllocator::G1PLABAllocator(G1Allocator* allocator) : _g1h(G1CollectedHeap::heap()), _allocator(allocator) { + + if (ResizePLAB) { + // See G1EvacStats::compute_desired_plab_sz for the reasoning why this is the + // expected number of refills. + double const ExpectedNumberOfRefills = G1LastPLABAverageOccupancy / TargetPLABWastePct; + // Add some padding to the threshold to not boost exactly when the targeted refills + // were reached. + // E.g. due to limitation of PLAB size to non-humongous objects and region boundaries + // a thread may experience more refills than expected. Keeping the PLAB waste low + // is the main goal, so being a bit conservative is better. + double const PadFactor = 1.5; + _tolerated_refills = MAX2(ExpectedNumberOfRefills, 1.0) * PadFactor; + } else { + // Make the tolerated refills a huge number; -1 because we add one to this + // value later and it would overflow otherwise. + _tolerated_refills = SIZE_MAX - 1; + } + for (region_type_t state = 0; state < G1HeapRegionAttr::Num; state++) { _direct_allocated[state] = 0; uint length = alloc_buffers_length(state); @@ -305,7 +323,10 @@ G1PLABAllocator::G1PLABAllocator(G1Allocator* allocator) : _alloc_buffers[state][node_index] = new PLAB(word_sz); } _num_plab_fills[state] = 0; + // The initial PLAB refill should not count, hence the +1 for the first boost. + _plab_fill_counter[state] = _tolerated_refills + 1; _num_direct_allocations[state] = 0; + _cur_desired_plab_size[state] = word_sz; } } @@ -327,18 +348,35 @@ HeapWord* G1PLABAllocator::allocate_direct_or_new_plab(G1HeapRegionAttr dest, size_t word_sz, bool* plab_refill_failed, uint node_index) { - size_t plab_word_size = _g1h->desired_plab_sz(dest); + size_t plab_word_size = plab_size(dest.type()); + size_t next_plab_word_size = plab_word_size; + + bool const should_boost_plab = _plab_fill_counter[dest.type()] == 0; + if (should_boost_plab) { + next_plab_word_size = _g1h->clamp_plab_size(next_plab_word_size * 2); + } + size_t required_in_plab = PLAB::size_required_for_allocation(word_sz); - // Only get a new PLAB if the allocation fits and it would not waste more than - // ParallelGCBufferWastePct in the existing buffer. - if ((required_in_plab <= plab_word_size) && + // Only get a new PLAB if the allocation fits into the to-be-allocated PLAB and + // it would not waste more than ParallelGCBufferWastePct in the current PLAB. + // Boosting the PLAB also increasingly allows more waste to occur. + if ((required_in_plab <= next_plab_word_size) && may_throw_away_buffer(required_in_plab, plab_word_size)) { PLAB* alloc_buf = alloc_buffer(dest, node_index); - alloc_buf->retire(); + guarantee(alloc_buf->words_remaining() <= required_in_plab, "must be"); _num_plab_fills[dest.type()]++; + alloc_buf->retire(); + + if (should_boost_plab) { + _plab_fill_counter[dest.type()] = _tolerated_refills; + } else { + _plab_fill_counter[dest.type()]--; + } + plab_word_size = next_plab_word_size; + _cur_desired_plab_size[dest.type()] = plab_word_size; size_t actual_plab_size = 0; HeapWord* buf = _allocator->par_allocate_during_gc(dest, @@ -376,7 +414,7 @@ void G1PLABAllocator::undo_allocation(G1HeapRegionAttr dest, HeapWord* obj, size alloc_buffer(dest, node_index)->undo_allocation(obj, word_sz); } -void G1PLABAllocator::flush_and_retire_stats() { +void G1PLABAllocator::flush_and_retire_stats(uint num_workers) { for (region_type_t state = 0; state < G1HeapRegionAttr::Num; state++) { G1EvacStats* stats = _g1h->alloc_buffer_stats(state); for (uint node_index = 0; node_index < alloc_buffers_length(state); node_index++) { @@ -389,6 +427,16 @@ void G1PLABAllocator::flush_and_retire_stats() { stats->add_direct_allocated(_direct_allocated[state]); stats->add_num_direct_allocated(_num_direct_allocations[state]); } + + log_trace(gc, plab)("PLAB boost: Young %zu -> %zu refills %zu (tolerated %zu) Old %zu -> %zu refills %zu (tolerated %zu)", + _g1h->alloc_buffer_stats(G1HeapRegionAttr::Young)->desired_plab_size(num_workers), + plab_size(G1HeapRegionAttr::Young), + _num_plab_fills[G1HeapRegionAttr::Young], + _tolerated_refills, + _g1h->alloc_buffer_stats(G1HeapRegionAttr::Old)->desired_plab_size(num_workers), + plab_size(G1HeapRegionAttr::Old), + _num_plab_fills[G1HeapRegionAttr::Old], + _tolerated_refills); } size_t G1PLABAllocator::waste() const { @@ -404,6 +452,10 @@ size_t G1PLABAllocator::waste() const { return result; } +size_t G1PLABAllocator::plab_size(G1HeapRegionAttr which) const { + return _cur_desired_plab_size[which.type()]; +} + size_t G1PLABAllocator::undo_waste() const { size_t result = 0; for (region_type_t state = 0; state < G1HeapRegionAttr::Num; state++) { diff --git a/src/hotspot/share/gc/g1/g1Allocator.hpp b/src/hotspot/share/gc/g1/g1Allocator.hpp index 9c4e007c5fe..2e6672f8037 100644 --- a/src/hotspot/share/gc/g1/g1Allocator.hpp +++ b/src/hotspot/share/gc/g1/g1Allocator.hpp @@ -165,7 +165,16 @@ private: size_t _num_plab_fills[G1HeapRegionAttr::Num]; size_t _num_direct_allocations[G1HeapRegionAttr::Num]; - void flush_and_retire_stats(); + size_t _plab_fill_counter[G1HeapRegionAttr::Num]; + // Current desired PLAB size incorporating eventual boosting. + size_t _cur_desired_plab_size[G1HeapRegionAttr::Num]; + + // The amount of PLAB refills tolerated until boosting PLAB size. + // This value is the same for all generations because they all use the same + // resizing logic. + size_t _tolerated_refills; + + void flush_and_retire_stats(uint num_workers); inline PLAB* alloc_buffer(G1HeapRegionAttr dest, uint node_index) const; inline PLAB* alloc_buffer(region_type_t dest, uint node_index) const; @@ -181,6 +190,7 @@ public: size_t waste() const; size_t undo_waste() const; + size_t plab_size(G1HeapRegionAttr which) const; // Allocate word_sz words in dest, either directly into the regions or by // allocating a new PLAB. Returns the address of the allocated memory, NULL if diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp index 6d50d137466..7ca576ab6ee 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp @@ -560,6 +560,12 @@ public: // Determines PLAB size for a given destination. inline size_t desired_plab_sz(G1HeapRegionAttr dest); + // Clamp the given PLAB word size to allowed values. Prevents humongous PLAB sizes + // for two reasons: + // * PLABs are allocated using a similar paths as oops, but should + // never be in a humongous region + // * Allowing humongous PLABs needlessly churns the region free lists + inline size_t clamp_plab_size(size_t value) const; // Do anything common to GC's. void gc_prologue(bool full); diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp index 0108492a8c2..6482fbd077f 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp @@ -67,11 +67,11 @@ G1EvacStats* G1CollectedHeap::alloc_buffer_stats(G1HeapRegionAttr dest) { size_t G1CollectedHeap::desired_plab_sz(G1HeapRegionAttr dest) { size_t gclab_word_size = alloc_buffer_stats(dest)->desired_plab_size(workers()->active_workers()); - // Prevent humongous PLAB sizes for two reasons: - // * PLABs are allocated using a similar paths as oops, but should - // never be in a humongous region - // * Allowing humongous PLABs needlessly churns the region free lists - return MIN2(_humongous_object_threshold_in_words, gclab_word_size); + return clamp_plab_size(gclab_word_size); +} + +inline size_t G1CollectedHeap::clamp_plab_size(size_t value) const { + return clamp(value, PLAB::min_size(), _humongous_object_threshold_in_words); } // Inline functions for G1CollectedHeap diff --git a/src/hotspot/share/gc/g1/g1EvacStats.cpp b/src/hotspot/share/gc/g1/g1EvacStats.cpp index 5522a015339..85ec939e79e 100644 --- a/src/hotspot/share/gc/g1/g1EvacStats.cpp +++ b/src/hotspot/share/gc/g1/g1EvacStats.cpp @@ -82,8 +82,11 @@ size_t G1EvacStats::compute_desired_plab_size() const { // TargetPLABWastePct of 10 had been set. // // So we could waste up to 10 words to meet that percentage. Given that we - // also assume that that buffer is typically half-full, the new desired PLAB - // size is set to 20 words. + // also assume that that buffer is typically half-full (G1LastPLABAverageOccupancy), + // the new desired PLAB size is set to 20 words. + // + // (This also implies that we expect G1LastPLABAverageOccupancy/TargetPLABWastePct + // number of refills during allocation). // // The amount of allocation performed should be independent of the number of // threads, so should the maximum waste we can spend in total. So if diff --git a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp index 0cd6aebc8cc..4b17ec314d7 100644 --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp @@ -140,6 +140,7 @@ class G1GCPhaseTimes : public CHeapObj { enum GCMergePSSWorkItems { MergePSSCopiedBytes, + MergePSSLABSize, MergePSSLABWasteBytes, MergePSSLABUndoWasteBytes }; diff --git a/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp b/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp index 9c43f231df8..afa1b99f684 100644 --- a/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp +++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp @@ -111,11 +111,11 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, initialize_numa_stats(); } -size_t G1ParScanThreadState::flush(size_t* surviving_young_words) { +size_t G1ParScanThreadState::flush(size_t* surviving_young_words, uint num_workers) { _rdc_local_qset.flush(); flush_numa_stats(); // Update allocation statistics. - _plab_allocator->flush_and_retire_stats(); + _plab_allocator->flush_and_retire_stats(num_workers); _g1h->policy()->record_age_table(&_age_table); if (_evacuation_failed_info.has_failed()) { @@ -592,7 +592,7 @@ void G1ParScanThreadStateSet::flush() { // because it resets the PLAB allocator where we get this info from. size_t lab_waste_bytes = pss->lab_waste_words() * HeapWordSize; size_t lab_undo_waste_bytes = pss->lab_undo_waste_words() * HeapWordSize; - size_t copied_bytes = pss->flush(_surviving_young_words_total) * HeapWordSize; + size_t copied_bytes = pss->flush(_surviving_young_words_total, _n_workers) * HeapWordSize; p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, copied_bytes, G1GCPhaseTimes::MergePSSCopiedBytes); p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_waste_bytes, G1GCPhaseTimes::MergePSSLABWasteBytes); diff --git a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp index dab2d718b7c..a57e1bbc26c 100644 --- a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp +++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp @@ -152,7 +152,7 @@ public: // Pass locally gathered statistics to global state. Returns the total number of // HeapWords copied. - size_t flush(size_t* surviving_young_words); + size_t flush(size_t* surviving_young_words, uint num_workers); private: void do_partial_array(PartialArrayScanTask task);