From 3844685be03b121e75095b45a76c0b99f2a41912 Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Fri, 26 Aug 2022 13:44:28 +0000 Subject: [PATCH] 8292697: ZGC: Hangs when almost out of metaspace memory Reviewed-by: eosterlund, coleenp --- .../share/classfile/classLoaderDataGraph.cpp | 4 +- src/hotspot/share/memory/metaspace.cpp | 23 ++++-- src/hotspot/share/memory/metaspace.hpp | 2 +- .../memory/metaspaceCriticalAllocation.cpp | 74 ++++++++++++++----- .../memory/metaspaceCriticalAllocation.hpp | 4 +- .../ShrinkGrowMultiJVM.java | 4 +- .../ShrinkGrowTest/ShrinkGrowTest.java | 1 + 7 files changed, 79 insertions(+), 33 deletions(-) diff --git a/src/hotspot/share/classfile/classLoaderDataGraph.cpp b/src/hotspot/share/classfile/classLoaderDataGraph.cpp index 3a1cc8894fe..95c8b805b5f 100644 --- a/src/hotspot/share/classfile/classLoaderDataGraph.cpp +++ b/src/hotspot/share/classfile/classLoaderDataGraph.cpp @@ -550,10 +550,12 @@ void ClassLoaderDataGraph::purge(bool at_safepoint) { delete purge_me; classes_unloaded = true; } + + Metaspace::purge(classes_unloaded); if (classes_unloaded) { - Metaspace::purge(); set_metaspace_oom(false); } + DependencyContext::purge_dependency_contexts(); // If we're purging metadata at a safepoint, clean remaining diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp index b82b7e913a3..cc809ab5f5f 100644 --- a/src/hotspot/share/memory/metaspace.cpp +++ b/src/hotspot/share/memory/metaspace.cpp @@ -995,23 +995,30 @@ const char* Metaspace::metadata_type_name(Metaspace::MetadataType mdtype) { } } -void Metaspace::purge() { +void Metaspace::purge(bool classes_unloaded) { // The MetaspaceCritical_lock is used by a concurrent GC to block out concurrent metaspace // allocations, that would starve critical metaspace allocations, that are about to throw // OOM if they fail; they need precedence for correctness. MutexLocker ml(MetaspaceCritical_lock, Mutex::_no_safepoint_check_flag); - ChunkManager* cm = ChunkManager::chunkmanager_nonclass(); - if (cm != NULL) { - cm->purge(); - } - if (using_class_space()) { - cm = ChunkManager::chunkmanager_class(); + if (classes_unloaded) { + ChunkManager* cm = ChunkManager::chunkmanager_nonclass(); if (cm != NULL) { cm->purge(); } + if (using_class_space()) { + cm = ChunkManager::chunkmanager_class(); + if (cm != NULL) { + cm->purge(); + } + } } - MetaspaceCriticalAllocation::satisfy(); + // Try to satisfy queued metaspace allocation requests. + // + // It might seem unnecessary to try to process allocation requests if no + // classes have been unloaded. However, this call is required for the code + // in MetaspaceCriticalAllocation::try_allocate_critical to work. + MetaspaceCriticalAllocation::process(); } bool Metaspace::contains(const void* ptr) { diff --git a/src/hotspot/share/memory/metaspace.hpp b/src/hotspot/share/memory/metaspace.hpp index c9cf6bd5f88..d3f9a3721ac 100644 --- a/src/hotspot/share/memory/metaspace.hpp +++ b/src/hotspot/share/memory/metaspace.hpp @@ -121,7 +121,7 @@ public: static bool contains_non_shared(const void* ptr); // Free empty virtualspaces - static void purge(); + static void purge(bool classes_unloaded); static void report_metadata_oome(ClassLoaderData* loader_data, size_t word_size, MetaspaceObj::Type type, MetadataType mdtype, TRAPS); diff --git a/src/hotspot/share/memory/metaspaceCriticalAllocation.cpp b/src/hotspot/share/memory/metaspaceCriticalAllocation.cpp index 08400e5b5d4..26e95238477 100644 --- a/src/hotspot/share/memory/metaspaceCriticalAllocation.cpp +++ b/src/hotspot/share/memory/metaspaceCriticalAllocation.cpp @@ -33,12 +33,12 @@ #include "runtime/mutexLocker.hpp" class MetadataAllocationRequest { - ClassLoaderData* _loader_data; - size_t _word_size; - Metaspace::MetadataType _type; - MetadataAllocationRequest* _next; - MetaWord* _result; - bool _has_result; + ClassLoaderData* const _loader_data; + const size_t _word_size; + const Metaspace::MetadataType _type; + MetadataAllocationRequest* _next; + MetaWord* _result; + bool _is_processed; public: MetadataAllocationRequest(ClassLoaderData* loader_data, @@ -49,7 +49,7 @@ public: _type(type), _next(NULL), _result(NULL), - _has_result(false) { + _is_processed(false) { MetaspaceCriticalAllocation::add(this); } @@ -57,17 +57,17 @@ public: MetaspaceCriticalAllocation::remove(this); } - ClassLoaderData* loader_data() const { return _loader_data; } - size_t word_size() const { return _word_size; } - Metaspace::MetadataType type() const { return _type; } - MetadataAllocationRequest* next() const { return _next; } - MetaWord* result() const { return _result; } - bool has_result() const { return _has_result; } + ClassLoaderData* loader_data() const { return _loader_data; } + size_t word_size() const { return _word_size; } + Metaspace::MetadataType type() const { return _type; } + MetadataAllocationRequest* next() const { return _next; } + MetaWord* result() const { return _result; } + bool is_processed() const { return _is_processed; } void set_next(MetadataAllocationRequest* next) { _next = next; } void set_result(MetaWord* result) { _result = result; - _has_result = true; + _is_processed = true; } }; @@ -113,13 +113,47 @@ void MetaspaceCriticalAllocation::remove(MetadataAllocationRequest* request) { } bool MetaspaceCriticalAllocation::try_allocate_critical(MetadataAllocationRequest* request) { + // This function uses an optimized scheme to limit the number of triggered + // GCs. The idea is that only one request in the list is responsible for + // triggering a GC, and later requests will try to piggy-back on that + // request. + // + // For this to work, it is important that we can tell which requests were + // seen by the GC's call to process(), and which requests were added after + // last proccess() call. The property '_is_processed' tells this. Because the + // logic below relies on that property, it is important that the GC calls + // process() even when the GC didn't unload any classes. + // + // Note that process() leaves the requests in the queue, so that threads + // in wait_for_purge, which had their requests processed, but didn't get any + // memory can exit that function and trigger a new GC as a last effort to get + // memory before throwing an OOME. + // + // Requests that have been processed once, will not trigger new GCs, we + // therefore filter them out when we determine if the current 'request' + // needs to trigger a GC, or if there are earlier requests that will + // trigger a GC. + { MutexLocker ml(MetaspaceCritical_lock, Mutex::_no_safepoint_check_flag); - if (_requests_head == request) { - // The first request can't opportunistically ride on a previous GC + auto is_first_unprocessed = [&]() { + for (MetadataAllocationRequest* curr = _requests_head; curr != NULL; curr = curr->next()) { + if (!curr->is_processed()) { + // curr is the first not satisfied request + return curr == request; + } + } + + return false; + }; + + if (is_first_unprocessed()) { + // The first non-processed request takes ownership of triggering the GC + // on behalf of itself, and all trailing requests in the list. return false; } } + // Try to ride on a previous GC and hope for early satisfaction wait_for_purge(request); return request->result() != NULL; @@ -129,7 +163,9 @@ void MetaspaceCriticalAllocation::wait_for_purge(MetadataAllocationRequest* requ ThreadBlockInVM tbivm(JavaThread::current()); MutexLocker ml(MetaspaceCritical_lock, Mutex::_no_safepoint_check_flag); for (;;) { - if (request->has_result()) { + if (request->is_processed()) { + // The GC has procesed this request during the purge. + // Return and check the result, and potentially call a last-effort GC. break; } MetaspaceCritical_lock->wait_without_safepoint_check(); @@ -144,12 +180,12 @@ void MetaspaceCriticalAllocation::block_if_concurrent_purge() { } } -void MetaspaceCriticalAllocation::satisfy() { +void MetaspaceCriticalAllocation::process() { assert_lock_strong(MetaspaceCritical_lock); bool all_satisfied = true; for (MetadataAllocationRequest* curr = _requests_head; curr != NULL; curr = curr->next()) { if (curr->result() != NULL) { - // Don't satisfy twice + // Don't satisfy twice (can still be processed twice) continue; } // Try to allocate metadata. diff --git a/src/hotspot/share/memory/metaspaceCriticalAllocation.hpp b/src/hotspot/share/memory/metaspaceCriticalAllocation.hpp index bdc9ad8cf2c..4ef0478d3f3 100644 --- a/src/hotspot/share/memory/metaspaceCriticalAllocation.hpp +++ b/src/hotspot/share/memory/metaspaceCriticalAllocation.hpp @@ -52,7 +52,7 @@ class ClassLoaderData; // survived that situation in theory. The motivation is that we are at this point so close // to being out of memory, and the VM is not having a good time, so the user really ought // to increase the amount of available metaspace anyway, instead of GC:ing around more -// to satisfy a very small number of additional allocations. But it does solve pathologial +// to satisfy a very small number of additional allocations. But it does solve pathological // unbounded starvation scenarios where OOM can get thrown even though most of metaspace // is full of dead metadata. // @@ -77,7 +77,7 @@ class MetaspaceCriticalAllocation : public AllStatic { public: static void block_if_concurrent_purge(); - static void satisfy(); + static void process(); static MetaWord* allocate(ClassLoaderData* loader_data, size_t word_size, Metaspace::MetadataType type); }; diff --git a/test/hotspot/jtreg/vmTestbase/metaspace/shrink_grow/ShrinkGrowMultiJVM/ShrinkGrowMultiJVM.java b/test/hotspot/jtreg/vmTestbase/metaspace/shrink_grow/ShrinkGrowMultiJVM/ShrinkGrowMultiJVM.java index d68f46e88ad..c59f8cd9e9e 100644 --- a/test/hotspot/jtreg/vmTestbase/metaspace/shrink_grow/ShrinkGrowMultiJVM/ShrinkGrowMultiJVM.java +++ b/test/hotspot/jtreg/vmTestbase/metaspace/shrink_grow/ShrinkGrowMultiJVM/ShrinkGrowMultiJVM.java @@ -58,7 +58,7 @@ public class ShrinkGrowMultiJVM { .resolve("java") .toAbsolutePath() .toString(), - "-Xlog:gc:gc_$i.log", // LOG_GC_ARG_INDEX + "UNSET_LOG_GC_ARG", // LOG_GC_ARG_INDEX "-XX:MetaspaceSize=10m", "-XX:MaxMetaspaceSize=20m", "-cp", @@ -81,7 +81,7 @@ public class ShrinkGrowMultiJVM { for (int i = 0; i < 5; i++) { // will be used as jvm id args[args.length - 1] = "jvm#" + i; - args[LOG_GC_ARG_INDEX] = "-Xlog:gc:gc_" + i + ".log"; + args[LOG_GC_ARG_INDEX] = "-Xlog:gc*:gc_" + i + ".log::filecount=0"; ProcessBuilder pb = new ProcessBuilder(args); try { Process p = pb.start(); diff --git a/test/hotspot/jtreg/vmTestbase/metaspace/shrink_grow/ShrinkGrowTest/ShrinkGrowTest.java b/test/hotspot/jtreg/vmTestbase/metaspace/shrink_grow/ShrinkGrowTest/ShrinkGrowTest.java index d8c90e277c9..f8a61c82d61 100644 --- a/test/hotspot/jtreg/vmTestbase/metaspace/shrink_grow/ShrinkGrowTest/ShrinkGrowTest.java +++ b/test/hotspot/jtreg/vmTestbase/metaspace/shrink_grow/ShrinkGrowTest/ShrinkGrowTest.java @@ -145,6 +145,7 @@ public class ShrinkGrowTest { // step 2: try to load one more class // it should be impossible try { + log("and finally, a wafer-thin mint"); eatALittleMemory(); throwFault("We haven't cleaned metaspace yet!"); } catch (OutOfMemoryError error) {