From 3a87eb5c4606ce39970962895315567e8606eba7 Mon Sep 17 00:00:00 2001 From: Kelvin Nilsen Date: Mon, 8 Jul 2024 18:03:19 +0000 Subject: [PATCH] 8335126: Shenandoah: Improve OOM handling Reviewed-by: shade, ysr, wkemper, rkennke --- .../gc/shenandoah/shenandoahControlThread.cpp | 3 +- .../gc/shenandoah/shenandoahDegeneratedGC.cpp | 1 - .../share/gc/shenandoah/shenandoahHeap.cpp | 46 ++++++++++++------- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp index e538ca02467..95a70de5790 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp @@ -318,7 +318,8 @@ void ShenandoahControlThread::service_concurrent_normal_cycle(GCCause::Cause cau ShenandoahConcurrentGC gc; if (gc.collect(cause)) { - // Cycle is complete + // Cycle is complete. There were no failed allocation requests and no degeneration, so count this as good progress. + heap->notify_gc_progress(); heap->heuristics()->record_success_concurrent(); heap->shenandoah_policy()->record_success_concurrent(gc.abbreviated()); } else { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp index fb5283bb1d8..6b597d9b2d7 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp @@ -228,7 +228,6 @@ void ShenandoahDegenGC::op_degenerated() { // Check for futility and fail. There is no reason to do several back-to-back Degenerated cycles, // because that probably means the heap is overloaded and/or fragmented. if (!metrics.is_good_progress()) { - heap->notify_gc_no_progress(); heap->cancel_gc(GCCause::_shenandoah_upgrade_to_full_gc); op_degenerated_futile(); } else { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp index ee3f4e7d8eb..5c5b6f7ebe5 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp @@ -945,24 +945,36 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) { return nullptr; } - // Block until control thread reacted, then retry allocation. - // - // It might happen that one of the threads requesting allocation would unblock - // way later after GC happened, only to fail the second allocation, because - // other threads have already depleted the free storage. In this case, a better - // strategy is to try again, as long as GC makes progress (or until at least - // one full GC has completed). - size_t original_count = shenandoah_policy()->full_gc_count(); - while (result == nullptr - && (get_gc_no_progress_count() == 0 || original_count == shenandoah_policy()->full_gc_count())) { - control_thread()->handle_alloc_failure(req, true); - result = allocate_memory_under_lock(req, in_new_region); - } + if (result == nullptr) { + // Block until control thread reacted, then retry allocation. + // + // It might happen that one of the threads requesting allocation would unblock + // way later after GC happened, only to fail the second allocation, because + // other threads have already depleted the free storage. In this case, a better + // strategy is to try again, until at least one full GC has completed. + // + // Stop retrying and return nullptr to cause OOMError exception if our allocation failed even after: + // a) We experienced a GC that had good progress, or + // b) We experienced at least one Full GC (whether or not it had good progress) + // + // TODO: Consider GLOBAL GC rather than Full GC to remediate OOM condition: https://bugs.openjdk.org/browse/JDK-8335910 - if (log_is_enabled(Debug, gc, alloc)) { - ResourceMark rm; - log_debug(gc, alloc)("Thread: %s, Result: " PTR_FORMAT ", Request: %s, Size: " SIZE_FORMAT ", Original: " SIZE_FORMAT ", Latest: " SIZE_FORMAT, - Thread::current()->name(), p2i(result), req.type_string(), req.size(), original_count, get_gc_no_progress_count()); + size_t original_count = shenandoah_policy()->full_gc_count(); + while ((result == nullptr) && (original_count == shenandoah_policy()->full_gc_count())) { + control_thread()->handle_alloc_failure(req, true); + result = allocate_memory_under_lock(req, in_new_region); + } + if (result != nullptr) { + // If our allocation request has been satisifed after it initially failed, we count this as good gc progress + notify_gc_progress(); + } + if (log_is_enabled(Debug, gc, alloc)) { + ResourceMark rm; + log_debug(gc, alloc)("Thread: %s, Result: " PTR_FORMAT ", Request: %s, Size: " SIZE_FORMAT + ", Original: " SIZE_FORMAT ", Latest: " SIZE_FORMAT, + Thread::current()->name(), p2i(result), req.type_string(), req.size(), + original_count, get_gc_no_progress_count()); + } } } else { assert(req.is_gc_alloc(), "Can only accept GC allocs here");