8255984: Shenandoah: "adaptive" heuristic is prone to missing load spikes

Reviewed-by: shade
This commit is contained in:
William Kemper 2020-11-23 18:53:59 +00:00 committed by Aleksey Shipilev
parent fa75ad695c
commit aac5c2a862
13 changed files with 339 additions and 34 deletions

View File

@ -27,14 +27,36 @@
#include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp"
#include "gc/shenandoah/shenandoahCollectionSet.hpp"
#include "gc/shenandoah/shenandoahFreeSet.hpp"
#include "gc/shenandoah/shenandoahHeap.inline.hpp"
#include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
#include "logging/log.hpp"
#include "logging/logTag.hpp"
#include "utilities/quickSort.hpp"
// These constants are used to adjust the margin of error for the moving
// average of the allocation rate and cycle time. The units are standard
// deviations.
const double ShenandoahAdaptiveHeuristics::FULL_PENALTY_SD = 0.2;
const double ShenandoahAdaptiveHeuristics::DEGENERATE_PENALTY_SD = 0.1;
// These are used to decide if we want to make any adjustments at all
// at the end of a successful concurrent cycle.
const double ShenandoahAdaptiveHeuristics::LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5;
const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5;
// These values are the confidence interval expressed as standard deviations.
// At the minimum confidence level, there is a 25% chance that the true value of
// the estimate (average cycle time or allocation rate) is not more than
// MINIMUM_CONFIDENCE standard deviations away from our estimate. Similarly, the
// MAXIMUM_CONFIDENCE interval here means there is a one in a thousand chance
// that the true value of our estimate is outside the interval. These are used
// as bounds on the adjustments applied at the outcome of a GC cycle.
const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25%
const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9%
ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics() :
ShenandoahHeuristics() {}
ShenandoahHeuristics(),
_margin_of_error_sd(ShenandoahAdaptiveInitialConfidence),
_spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold),
_last_trigger(OTHER) { }
ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {}
@ -98,20 +120,94 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand
void ShenandoahAdaptiveHeuristics::record_cycle_start() {
ShenandoahHeuristics::record_cycle_start();
_allocation_rate.allocation_counter_reset();
}
bool ShenandoahAdaptiveHeuristics::should_start_gc() const {
void ShenandoahAdaptiveHeuristics::record_success_concurrent() {
ShenandoahHeuristics::record_success_concurrent();
size_t available = ShenandoahHeap::heap()->free_set()->available();
_available.add(available);
double z_score = 0.0;
if (_available.sd() > 0) {
z_score = (available - _available.avg()) / _available.sd();
}
log_debug(gc, ergo)("Available: " SIZE_FORMAT " %sB, z-score=%.3f. Average available: %.1f %sB +/- %.1f %sB.",
byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
z_score,
byte_size_in_proper_unit(_available.avg()), proper_unit_for_byte_size(_available.avg()),
byte_size_in_proper_unit(_available.sd()), proper_unit_for_byte_size(_available.sd()));
// In the case when a concurrent GC cycle completes successfully but with an
// unusually small amount of available memory we will adjust our trigger
// parameters so that they are more likely to initiate a new cycle.
// Conversely, when a GC cycle results in an above average amount of available
// memory, we will adjust the trigger parameters to be less likely to initiate
// a GC cycle.
//
// The z-score we've computed is in no way statistically related to the
// trigger parameters, but it has the nice property that worse z-scores for
// available memory indicate making larger adjustments to the trigger
// parameters. It also results in fewer adjustments as the application
// stabilizes.
//
// In order to avoid making endless and likely unnecessary adjustments to the
// trigger parameters, the change in available memory (with respect to the
// average) at the end of a cycle must be beyond these threshold values.
if (z_score < LOWEST_EXPECTED_AVAILABLE_AT_END ||
z_score > HIGHEST_EXPECTED_AVAILABLE_AT_END) {
// The sign is flipped because a negative z-score indicates that the
// available memory at the end of the cycle is below average. Positive
// adjustments make the triggers more sensitive (i.e., more likely to fire).
// The z-score also gives us a measure of just how far below normal. This
// property allows us to adjust the trigger parameters proportionally.
//
// The `100` here is used to attenuate the size of our adjustments. This
// number was chosen empirically. It also means the adjustments at the end of
// a concurrent cycle are an order of magnitude smaller than the adjustments
// made for a degenerated or full GC cycle (which themselves were also
// chosen empirically).
adjust_last_trigger_parameters(z_score / -100);
}
}
void ShenandoahAdaptiveHeuristics::record_success_degenerated() {
ShenandoahHeuristics::record_success_degenerated();
// Adjust both trigger's parameters in the case of a degenerated GC because
// either of them should have triggered earlier to avoid this case.
adjust_margin_of_error(DEGENERATE_PENALTY_SD);
adjust_spike_threshold(DEGENERATE_PENALTY_SD);
}
void ShenandoahAdaptiveHeuristics::record_success_full() {
ShenandoahHeuristics::record_success_full();
// Adjust both trigger's parameters in the case of a full GC because
// either of them should have triggered earlier to avoid this case.
adjust_margin_of_error(FULL_PENALTY_SD);
adjust_spike_threshold(FULL_PENALTY_SD);
}
static double saturate(double value, double min, double max) {
return MAX2(MIN2(value, max), min);
}
bool ShenandoahAdaptiveHeuristics::should_start_gc() {
ShenandoahHeap* heap = ShenandoahHeap::heap();
size_t max_capacity = heap->max_capacity();
size_t capacity = heap->soft_max_capacity();
size_t available = heap->free_set()->available();
size_t allocated = heap->bytes_allocated_since_gc_start();
// Make sure the code below treats available without the soft tail.
size_t soft_tail = max_capacity - capacity;
available = (available > soft_tail) ? (available - soft_tail) : 0;
// Check if we are falling below the worst limit, time to trigger the GC, regardless of
// anything else.
// Track allocation rate even if we decide to start a cycle for other reasons.
double rate = _allocation_rate.sample(allocated);
_last_trigger = OTHER;
size_t min_threshold = capacity / 100 * ShenandoahMinFreeThreshold;
if (available < min_threshold) {
log_info(gc)("Trigger: Free (" SIZE_FORMAT "%s) is below minimum threshold (" SIZE_FORMAT "%s)",
@ -120,7 +216,6 @@ bool ShenandoahAdaptiveHeuristics::should_start_gc() const {
return true;
}
// Check if are need to learn a bit about the application
const size_t max_learn = ShenandoahLearningSteps;
if (_gc_times_learned < max_learn) {
size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold;
@ -136,7 +231,6 @@ bool ShenandoahAdaptiveHeuristics::should_start_gc() const {
// Check if allocation headroom is still okay. This also factors in:
// 1. Some space to absorb allocation spikes
// 2. Accumulated penalties from Degenerated and Full GC
size_t allocation_headroom = available;
size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor;
@ -145,24 +239,127 @@ bool ShenandoahAdaptiveHeuristics::should_start_gc() const {
allocation_headroom -= MIN2(allocation_headroom, spike_headroom);
allocation_headroom -= MIN2(allocation_headroom, penalties);
// TODO: Allocation rate is way too averaged to be useful during state changes
double avg_cycle_time = _gc_time_history->davg() + (_margin_of_error_sd * _gc_time_history->dsd());
double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd);
if (avg_cycle_time > allocation_headroom / avg_alloc_rate) {
log_info(gc)("Trigger: Average GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (margin of error = %.2f)",
avg_cycle_time * 1000,
byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate),
byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
_margin_of_error_sd);
double average_gc = _gc_time_history->avg();
double time_since_last = time_since_last_gc();
double allocation_rate = heap->bytes_allocated_since_gc_start() / time_since_last;
if (average_gc > allocation_headroom / allocation_rate) {
log_info(gc)("Trigger: Average GC time (%.2f ms) is above the time for allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s)",
average_gc * 1000,
byte_size_in_proper_unit(allocation_rate), proper_unit_for_byte_size(allocation_rate),
byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom));
log_info(gc, ergo)("Free headroom: " SIZE_FORMAT "%s (free) - " SIZE_FORMAT "%s (spike) - " SIZE_FORMAT "%s (penalties) = " SIZE_FORMAT "%s",
byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom),
byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties),
byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom));
byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom),
byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties),
byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom));
_last_trigger = RATE;
return true;
}
bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd);
if (is_spiking && avg_cycle_time > allocation_headroom / rate) {
log_info(gc)("Trigger: Average GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (spike threshold = %.2f)",
avg_cycle_time * 1000,
byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate),
byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
_spike_threshold_sd);
_last_trigger = SPIKE;
return true;
}
return ShenandoahHeuristics::should_start_gc();
}
void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) {
switch (_last_trigger) {
case RATE:
adjust_margin_of_error(amount);
break;
case SPIKE:
adjust_spike_threshold(amount);
break;
case OTHER:
// nothing to adjust here.
break;
default:
ShouldNotReachHere();
}
}
void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) {
_margin_of_error_sd = saturate(_margin_of_error_sd + amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE);
log_debug(gc, ergo)("Margin of error now %.2f", _margin_of_error_sd);
}
void ShenandoahAdaptiveHeuristics::adjust_spike_threshold(double amount) {
_spike_threshold_sd = saturate(_spike_threshold_sd - amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE);
log_debug(gc, ergo)("Spike threshold now: %.2f", _spike_threshold_sd);
}
ShenandoahAllocationRate::ShenandoahAllocationRate() :
_last_sample_time(os::elapsedTime()),
_last_sample_value(0),
_interval_sec(1.0 / ShenandoahAdaptiveSampleFrequencyHz),
_rate(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor),
_rate_avg(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor) {
}
double ShenandoahAllocationRate::sample(size_t allocated) {
double now = os::elapsedTime();
double rate = 0.0;
if (now - _last_sample_time > _interval_sec) {
if (allocated >= _last_sample_value) {
rate = instantaneous_rate(now, allocated);
_rate.add(rate);
_rate_avg.add(_rate.avg());
}
_last_sample_time = now;
_last_sample_value = allocated;
}
return rate;
}
double ShenandoahAllocationRate::upper_bound(double sds) const {
// Here we are using the standard deviation of the computed running
// average, rather than the standard deviation of the samples that went
// into the moving average. This is a much more stable value and is tied
// to the actual statistic in use (moving average over samples of averages).
return _rate.davg() + (sds * _rate_avg.dsd());
}
void ShenandoahAllocationRate::allocation_counter_reset() {
_last_sample_time = os::elapsedTime();
_last_sample_value = 0;
}
bool ShenandoahAllocationRate::is_spiking(double rate, double threshold) const {
if (rate <= 0.0) {
return false;
}
double sd = _rate.sd();
if (sd > 0) {
// There is a small chance that that rate has already been sampled, but it
// seems not to matter in practice.
double z_score = (rate - _rate.avg()) / sd;
if (z_score > threshold) {
return true;
}
}
return false;
}
double ShenandoahAllocationRate::instantaneous_rate(size_t allocated) const {
return instantaneous_rate(os::elapsedTime(), allocated);
}
double ShenandoahAllocationRate::instantaneous_rate(double time, size_t allocated) const {
size_t last_value = _last_sample_value;
double last_time = _last_sample_time;
size_t allocation_delta = (allocated > last_value) ? (allocated - last_value) : 0;
double time_delta_sec = time - last_time;
return (time_delta_sec > 0) ? (allocation_delta / time_delta_sec) : 0;
}

View File

@ -29,6 +29,28 @@
#include "gc/shenandoah/shenandoahPhaseTimings.hpp"
#include "utilities/numberSeq.hpp"
class ShenandoahAllocationRate : public CHeapObj<mtGC> {
public:
explicit ShenandoahAllocationRate();
void allocation_counter_reset();
double sample(size_t allocated);
double instantaneous_rate(size_t allocated) const;
double upper_bound(double sds) const;
bool is_spiking(double rate, double threshold) const;
private:
double instantaneous_rate(double time, size_t allocated) const;
double _last_sample_time;
size_t _last_sample_value;
double _interval_sec;
TruncatedSeq _rate;
TruncatedSeq _rate_avg;
};
class ShenandoahAdaptiveHeuristics : public ShenandoahHeuristics {
public:
ShenandoahAdaptiveHeuristics();
@ -40,12 +62,70 @@ public:
size_t actual_free);
void record_cycle_start();
void record_success_concurrent();
void record_success_degenerated();
void record_success_full();
virtual bool should_start_gc() const;
virtual bool should_start_gc();
virtual const char* name() { return "Adaptive"; }
virtual bool is_diagnostic() { return false; }
virtual bool is_experimental() { return false; }
private:
// These are used to adjust the margin of error and the spike threshold
// in response to GC cycle outcomes. These values are shared, but the
// margin of error and spike threshold trend in opposite directions.
const static double FULL_PENALTY_SD;
const static double DEGENERATE_PENALTY_SD;
const static double MINIMUM_CONFIDENCE;
const static double MAXIMUM_CONFIDENCE;
const static double LOWEST_EXPECTED_AVAILABLE_AT_END;
const static double HIGHEST_EXPECTED_AVAILABLE_AT_END;
friend class ShenandoahAllocationRate;
// Used to record the last trigger that signaled to start a GC.
// This itself is used to decide whether or not to adjust the margin of
// error for the average cycle time and allocation rate or the allocation
// spike detection threshold.
enum Trigger {
SPIKE, RATE, OTHER
};
void adjust_last_trigger_parameters(double amount);
void adjust_margin_of_error(double amount);
void adjust_spike_threshold(double amount);
ShenandoahAllocationRate _allocation_rate;
// The margin of error expressed in standard deviations to add to our
// average cycle time and allocation rate. As this value increases we
// tend to over estimate the rate at which mutators will deplete the
// heap. In other words, erring on the side of caution will trigger more
// concurrent GCs.
double _margin_of_error_sd;
// The allocation spike threshold is expressed in standard deviations.
// If the standard deviation of the most recent sample of the allocation
// rate exceeds this threshold, a GC cycle is started. As this value
// decreases the sensitivity to allocation spikes increases. In other
// words, lowering the spike threshold will tend to increase the number
// of concurrent GCs.
double _spike_threshold_sd;
// Remember which trigger is responsible for the last GC cycle. When the
// outcome of the cycle is evaluated we will adjust the parameters for the
// corresponding triggers. Note that successful outcomes will raise
// the spike threshold and lower the margin of error.
Trigger _last_trigger;
// Keep track of the available memory at the end of a GC cycle. This
// establishes what is 'normal' for the application and is used as a
// source of feedback to adjust trigger parameters.
TruncatedSeq _available;
};
#endif // SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHADAPTIVEHEURISTICS_HPP

View File

@ -56,7 +56,7 @@ void ShenandoahAggressiveHeuristics::choose_collection_set_from_regiondata(Shena
}
}
bool ShenandoahAggressiveHeuristics::should_start_gc() const {
bool ShenandoahAggressiveHeuristics::should_start_gc() {
log_info(gc)("Trigger: Start next cycle immediately");
return true;
}

View File

@ -35,7 +35,7 @@ public:
RegionData* data, size_t size,
size_t free);
virtual bool should_start_gc() const;
virtual bool should_start_gc();
virtual bool should_unload_classes();

View File

@ -44,7 +44,7 @@ ShenandoahCompactHeuristics::ShenandoahCompactHeuristics() : ShenandoahHeuristic
SHENANDOAH_ERGO_OVERRIDE_DEFAULT(ShenandoahGarbageThreshold, 10);
}
bool ShenandoahCompactHeuristics::should_start_gc() const {
bool ShenandoahCompactHeuristics::should_start_gc() {
ShenandoahHeap* heap = ShenandoahHeap::heap();
size_t max_capacity = heap->max_capacity();

View File

@ -31,7 +31,7 @@ class ShenandoahCompactHeuristics : public ShenandoahHeuristics {
public:
ShenandoahCompactHeuristics();
virtual bool should_start_gc() const;
virtual bool should_start_gc();
virtual void choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
RegionData* data, size_t size,

View File

@ -50,7 +50,7 @@ ShenandoahHeuristics::ShenandoahHeuristics() :
_last_cycle_end(0),
_gc_times_learned(0),
_gc_time_penalties(0),
_gc_time_history(new TruncatedSeq(5)),
_gc_time_history(new TruncatedSeq(10, ShenandoahAdaptiveDecayFactor)),
_metaspace_oom()
{
// No unloading during concurrent mark? Communicate that to heuristics
@ -182,7 +182,7 @@ void ShenandoahHeuristics::record_cycle_end() {
_last_cycle_end = os::elapsedTime();
}
bool ShenandoahHeuristics::should_start_gc() const {
bool ShenandoahHeuristics::should_start_gc() {
// Perform GC to cleanup metaspace
if (has_metaspace_oom()) {
// Some of vmTestbase/metaspace tests depend on following line to count GC cycles

View File

@ -104,7 +104,7 @@ public:
virtual void record_cycle_end();
virtual bool should_start_gc() const;
virtual bool should_start_gc();
virtual bool should_degenerate_cycle();

View File

@ -31,7 +31,7 @@
#include "logging/log.hpp"
#include "logging/logTag.hpp"
bool ShenandoahPassiveHeuristics::should_start_gc() const {
bool ShenandoahPassiveHeuristics::should_start_gc() {
// Never do concurrent GCs.
return false;
}

View File

@ -29,7 +29,7 @@
class ShenandoahPassiveHeuristics : public ShenandoahHeuristics {
public:
virtual bool should_start_gc() const;
virtual bool should_start_gc();
virtual bool should_unload_classes();

View File

@ -39,7 +39,7 @@ ShenandoahStaticHeuristics::ShenandoahStaticHeuristics() : ShenandoahHeuristics(
ShenandoahStaticHeuristics::~ShenandoahStaticHeuristics() {}
bool ShenandoahStaticHeuristics::should_start_gc() const {
bool ShenandoahStaticHeuristics::should_start_gc() {
ShenandoahHeap* heap = ShenandoahHeap::heap();
size_t max_capacity = heap->max_capacity();

View File

@ -33,7 +33,7 @@ public:
virtual ~ShenandoahStaticHeuristics();
virtual bool should_start_gc() const;
virtual bool should_start_gc();
virtual void choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
RegionData* data, size_t size,

View File

@ -127,6 +127,34 @@
"to 100 effectively disables the shortcut.") \
range(0,100) \
\
product(uintx, ShenandoahAdaptiveSampleFrequencyHz, 10, EXPERIMENTAL, \
"The number of times per second to update the allocation rate " \
"moving average.") \
\
product(uintx, ShenandoahAdaptiveSampleSizeSeconds, 10, EXPERIMENTAL, \
"The size of the moving window over which the average " \
"allocation rate is maintained. The total number of samples " \
"is the product of this number and the sample frequency.") \
\
product(double, ShenandoahAdaptiveInitialConfidence, 1.8, EXPERIMENTAL, \
"The number of standard deviations used to determine an initial " \
"margin of error for the average cycle time and average " \
"allocation rate. Increasing this value will cause the " \
"heuristic to initiate more concurrent cycles." ) \
\
product(double, ShenandoahAdaptiveInitialSpikeThreshold, 1.8, EXPERIMENTAL, \
"If the most recently sampled allocation rate is more than " \
"this many standard deviations away from the moving average, " \
"then a cycle is initiated. This value controls how sensitive " \
"the heuristic is to allocation spikes. Decreasing this number " \
"increases the sensitivity. ") \
\
product(double, ShenandoahAdaptiveDecayFactor, 0.5, EXPERIMENTAL, \
"The decay factor (alpha) used for values in the weighted " \
"moving average of cycle time and allocation rate. " \
"Larger values give more weight to recent values.") \
range(0,1.0) \
\
product(uintx, ShenandoahGuaranteedGCInterval, 5*60*1000, EXPERIMENTAL, \
"Many heuristics would guarantee a concurrent GC cycle at " \
"least with this interval. This is useful when large idle " \