8335126: Shenandoah: Improve OOM handling

Reviewed-by: shade, ysr, wkemper, rkennke
This commit is contained in:
Kelvin Nilsen 2024-07-08 18:03:19 +00:00
parent 284671a1e4
commit 3a87eb5c46
3 changed files with 31 additions and 19 deletions

View File

@ -318,7 +318,8 @@ void ShenandoahControlThread::service_concurrent_normal_cycle(GCCause::Cause cau
ShenandoahConcurrentGC gc;
if (gc.collect(cause)) {
// Cycle is complete
// Cycle is complete. There were no failed allocation requests and no degeneration, so count this as good progress.
heap->notify_gc_progress();
heap->heuristics()->record_success_concurrent();
heap->shenandoah_policy()->record_success_concurrent(gc.abbreviated());
} else {

View File

@ -228,7 +228,6 @@ void ShenandoahDegenGC::op_degenerated() {
// Check for futility and fail. There is no reason to do several back-to-back Degenerated cycles,
// because that probably means the heap is overloaded and/or fragmented.
if (!metrics.is_good_progress()) {
heap->notify_gc_no_progress();
heap->cancel_gc(GCCause::_shenandoah_upgrade_to_full_gc);
op_degenerated_futile();
} else {

View File

@ -945,24 +945,36 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) {
return nullptr;
}
// Block until control thread reacted, then retry allocation.
//
// It might happen that one of the threads requesting allocation would unblock
// way later after GC happened, only to fail the second allocation, because
// other threads have already depleted the free storage. In this case, a better
// strategy is to try again, as long as GC makes progress (or until at least
// one full GC has completed).
size_t original_count = shenandoah_policy()->full_gc_count();
while (result == nullptr
&& (get_gc_no_progress_count() == 0 || original_count == shenandoah_policy()->full_gc_count())) {
control_thread()->handle_alloc_failure(req, true);
result = allocate_memory_under_lock(req, in_new_region);
}
if (result == nullptr) {
// Block until control thread reacted, then retry allocation.
//
// It might happen that one of the threads requesting allocation would unblock
// way later after GC happened, only to fail the second allocation, because
// other threads have already depleted the free storage. In this case, a better
// strategy is to try again, until at least one full GC has completed.
//
// Stop retrying and return nullptr to cause OOMError exception if our allocation failed even after:
// a) We experienced a GC that had good progress, or
// b) We experienced at least one Full GC (whether or not it had good progress)
//
// TODO: Consider GLOBAL GC rather than Full GC to remediate OOM condition: https://bugs.openjdk.org/browse/JDK-8335910
if (log_is_enabled(Debug, gc, alloc)) {
ResourceMark rm;
log_debug(gc, alloc)("Thread: %s, Result: " PTR_FORMAT ", Request: %s, Size: " SIZE_FORMAT ", Original: " SIZE_FORMAT ", Latest: " SIZE_FORMAT,
Thread::current()->name(), p2i(result), req.type_string(), req.size(), original_count, get_gc_no_progress_count());
size_t original_count = shenandoah_policy()->full_gc_count();
while ((result == nullptr) && (original_count == shenandoah_policy()->full_gc_count())) {
control_thread()->handle_alloc_failure(req, true);
result = allocate_memory_under_lock(req, in_new_region);
}
if (result != nullptr) {
// If our allocation request has been satisifed after it initially failed, we count this as good gc progress
notify_gc_progress();
}
if (log_is_enabled(Debug, gc, alloc)) {
ResourceMark rm;
log_debug(gc, alloc)("Thread: %s, Result: " PTR_FORMAT ", Request: %s, Size: " SIZE_FORMAT
", Original: " SIZE_FORMAT ", Latest: " SIZE_FORMAT,
Thread::current()->name(), p2i(result), req.type_string(), req.size(),
original_count, get_gc_no_progress_count());
}
}
} else {
assert(req.is_gc_alloc(), "Can only accept GC allocs here");