diff --git a/src/hotspot/share/gc/g1/g1Arguments.cpp b/src/hotspot/share/gc/g1/g1Arguments.cpp index b8a7c677f7f..79b50cd0ea9 100644 --- a/src/hotspot/share/gc/g1/g1Arguments.cpp +++ b/src/hotspot/share/gc/g1/g1Arguments.cpp @@ -126,7 +126,6 @@ void G1Arguments::initialize_mark_stack_size() { FLAG_SET_ERGO(MarkStackSize, mark_stack_size); } - log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K)); } diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp index 78e1bec3438..533817f9724 100644 --- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp +++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp @@ -75,6 +75,7 @@ #include "utilities/align.hpp" #include "utilities/formatBuffer.hpp" #include "utilities/growableArray.hpp" +#include "utilities/powerOfTwo.hpp" bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { assert(addr < _cm->finger(), "invariant"); @@ -94,80 +95,173 @@ bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { } G1CMMarkStack::G1CMMarkStack() : - _max_chunk_capacity(0), - _base(nullptr), - _chunk_capacity(0) { + _chunk_allocator() { set_empty(); } -bool G1CMMarkStack::resize(size_t new_capacity) { - assert(is_empty(), "Only resize when stack is empty."); - assert(new_capacity <= _max_chunk_capacity, - "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); - - TaskQueueEntryChunk* new_base = MmapArrayAllocator::allocate_or_null(new_capacity, mtGC); - - if (new_base == nullptr) { - log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); - return false; - } - // Release old mapping. - if (_base != nullptr) { - MmapArrayAllocator::free(_base, _chunk_capacity); - } - - _base = new_base; - _chunk_capacity = new_capacity; - set_empty(); - - return true; -} - size_t G1CMMarkStack::capacity_alignment() { return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); } -bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { - guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); +bool G1CMMarkStack::initialize() { + guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized."); + + size_t initial_capacity = MarkStackSize; + size_t max_capacity = MarkStackSizeMax; size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); - _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; - size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; + size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; + size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; - guarantee(initial_chunk_capacity <= _max_chunk_capacity, - "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, - _max_chunk_capacity, - initial_chunk_capacity); + initial_num_chunks = round_up_power_of_2(initial_num_chunks); + max_num_chunks = MAX2(initial_num_chunks, max_num_chunks); + + size_t limit = (INT_MAX - 1); + max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit); + initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit); + + FLAG_SET_ERGO(MarkStackSizeMax, max_capacity); + FLAG_SET_ERGO(MarkStackSize, initial_capacity); + + log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K)); log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, - initial_chunk_capacity, _max_chunk_capacity); + initial_num_chunks, max_capacity); - return resize(initial_chunk_capacity); + return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks); +} + +G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() { + if (_size >= _max_capacity) { + return nullptr; + } + + size_t cur_idx = Atomic::fetch_then_add(&_size, 1u); + + if (cur_idx >= _max_capacity) { + return nullptr; + } + + size_t bucket = get_bucket(cur_idx); + if (Atomic::load_acquire(&_buckets[bucket]) == nullptr) { + if (!_should_grow) { + // Prefer to restart the CM. + return nullptr; + } + + MutexLocker x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); + if (Atomic::load_acquire(&_buckets[bucket]) == nullptr) { + if (!expand()) { + return nullptr; + } + } + } + + size_t bucket_idx = get_bucket_index(cur_idx); + TaskQueueEntryChunk* result = ::new (&_buckets[bucket][bucket_idx]) TaskQueueEntryChunk; + result->next = nullptr; + return result; +} + +G1CMMarkStack::ChunkAllocator::ChunkAllocator() : + _min_capacity(0), + _max_capacity(0), + _capacity(0), + _num_buckets(0), + _should_grow(false), + _buckets(nullptr), + _size(0) +{ } + +bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) { + guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity"); + + _min_capacity = initial_capacity; + _max_capacity = max_capacity; + _num_buckets = get_bucket(_max_capacity) + 1; + + _buckets = NEW_C_HEAP_ARRAY(TaskQueueEntryChunk*, _num_buckets, mtGC); + + for (size_t i = 0; i < _num_buckets; i++) { + _buckets[i] = nullptr; + } + + size_t new_capacity = bucket_size(0); + + if (!reserve(new_capacity)) { + log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); + return false; + } + return true; +} + +bool G1CMMarkStack::ChunkAllocator::expand() { + if (_capacity == _max_capacity) { + log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _capacity); + return false; + } + size_t old_capacity = _capacity; + // Double capacity if possible. + size_t new_capacity = MIN2(old_capacity * 2, _max_capacity); + + if (reserve(new_capacity)) { + log_debug(gc)("Expanded the mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", + old_capacity, new_capacity); + return true; + } + return false; +} + +G1CMMarkStack::ChunkAllocator::~ChunkAllocator() { + if (_buckets == nullptr) { + return; + } + + for (size_t i = 0; i < _num_buckets; i++) { + if (_buckets[i] != nullptr) { + MmapArrayAllocator::free(_buckets[i], bucket_size(i)); + _buckets[i] = nullptr; + } + } + + FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets); +} + +bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) { + assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity" SIZE_FORMAT " chunks.", _max_capacity); + + size_t highest_bucket = get_bucket(new_capacity - 1); + size_t i = get_bucket(_capacity); + + for (; i <= highest_bucket; i++) { + if (Atomic::load_acquire(&_buckets[i]) != nullptr) { + continue; // Skip over already allocated buckets. + } + + size_t bucket_capacity = bucket_size(i); + + // Trim bucket size so that we do not exceed the _max_capacity. + bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ? + bucket_capacity : + _max_capacity - _capacity; + + + TaskQueueEntryChunk* bucket_base = MmapArrayAllocator::allocate_or_null(bucket_capacity, mtGC); + + if (bucket_base == nullptr) { + log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", + bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk)); + return false; + } + _capacity += bucket_capacity; + Atomic::release_store(&_buckets[i], bucket_base); + } + return true; } void G1CMMarkStack::expand() { - if (_chunk_capacity == _max_chunk_capacity) { - log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); - return; - } - size_t old_capacity = _chunk_capacity; - // Double capacity if possible - size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); - - if (resize(new_capacity)) { - log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", - old_capacity, new_capacity); - } else { - log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", - old_capacity, new_capacity); - } -} - -G1CMMarkStack::~G1CMMarkStack() { - if (_base != nullptr) { - MmapArrayAllocator::free(_base, _chunk_capacity); - } + _chunk_allocator.expand(); } void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { @@ -208,31 +302,13 @@ G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() return remove_chunk_from_list(&_free_list); } -G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { - // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. - // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding - // wraparound of _hwm. - if (_hwm >= _chunk_capacity) { - return nullptr; - } - - size_t cur_idx = Atomic::fetch_then_add(&_hwm, 1u); - if (cur_idx >= _chunk_capacity) { - return nullptr; - } - - TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; - result->next = nullptr; - return result; -} - bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { // Get a new chunk. TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); if (new_chunk == nullptr) { // Did not get a chunk from the free list. Allocate from backing memory. - new_chunk = allocate_new_chunk(); + new_chunk = _chunk_allocator.allocate_new_chunk(); if (new_chunk == nullptr) { return false; @@ -261,9 +337,9 @@ bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { void G1CMMarkStack::set_empty() { _chunks_in_chunk_list = 0; - _hwm = 0; _chunk_list = nullptr; _free_list = nullptr; + _chunk_allocator.reset(); } G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) : @@ -440,7 +516,7 @@ G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, _concurrent_workers->initialize_workers(); _num_concurrent_workers = _concurrent_workers->active_workers(); - if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { + if (!_global_mark_stack.initialize()) { vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); } @@ -1635,6 +1711,9 @@ void G1ConcurrentMark::weak_refs_work() { assert(_global_mark_stack.is_empty(), "mark stack should be empty"); + // Prefer to grow the stack until the max capacity. + _global_mark_stack.set_should_grow(); + // We need at least one active thread. If reference processing // is not multi-threaded we use the current (VMThread) thread, // otherwise we use the workers from the G1CollectedHeap and diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp b/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp index 4799f32e0c8..efc6584bda0 100644 --- a/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp +++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp @@ -136,10 +136,101 @@ private: G1TaskQueueEntry data[EntriesPerChunk]; }; - size_t _max_chunk_capacity; // Maximum number of TaskQueueEntryChunk elements on the stack. + class ChunkAllocator { + // The chunk allocator relies on a growable array data structure that allows resizing without the + // need to copy existing items. The basic approach involves organizing the array into chunks, + // essentially creating an "array of arrays"; referred to as buckets in this implementation. To + // facilitate efficient indexing, the size of the first bucket is set to a power of 2. This choice + // allows for quick conversion of an array index into a bucket index and the corresponding offset + // within the bucket. Additionally, each new bucket added to the growable array doubles the capacity of + // the growable array. + // + // Illustration of the Growable Array data structure. + // + // +----+ +----+----+ + // | |------->| | | + // | | +----+----+ + // +----+ +----+----+ + // | |------->| | | + // | | +----+----+ + // +----+ +-----+-----+-----+-----+ + // | |------->| | | | | + // | | +-----+-----+-----+-----+ + // +----+ +-----+-----+-----+-----+-----+-----+-----+----+ + // | |------->| | | | | | | | | + // | | +-----+-----+-----+-----+-----+-----+-----+----+ + // +----+ + // + size_t _min_capacity; + size_t _max_capacity; + size_t _capacity; + size_t _num_buckets; + bool _should_grow; + TaskQueueEntryChunk* volatile* _buckets; + char _pad0[DEFAULT_CACHE_LINE_SIZE]; + volatile size_t _size; + char _pad4[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; - TaskQueueEntryChunk* _base; // Bottom address of allocated memory area. - size_t _chunk_capacity; // Current maximum number of TaskQueueEntryChunk elements. + size_t bucket_size(size_t bucket) { + return (bucket == 0) ? + _min_capacity : + _min_capacity * ( 1ULL << (bucket -1)); + } + + static unsigned int find_highest_bit(uintptr_t mask) { + return count_leading_zeros(mask) ^ (BitsPerWord - 1U); + } + + size_t get_bucket(size_t array_idx) { + if (array_idx < _min_capacity) { + return 0; + } + + return find_highest_bit(array_idx) - find_highest_bit(_min_capacity) + 1; + } + + size_t get_bucket_index(size_t array_idx) { + if (array_idx < _min_capacity) { + return array_idx; + } + return array_idx - (1ULL << find_highest_bit(array_idx)); + } + + bool reserve(size_t new_capacity); + + public: + ChunkAllocator(); + + ~ChunkAllocator(); + + bool initialize(size_t initial_capacity, size_t max_capacity); + + void reset() { + _size = 0; + _should_grow = false; + } + + // During G1CMConcurrentMarkingTask or finalize_marking phases, we prefer to restart the marking when + // the G1CMMarkStack overflows. Attempts to expand the G1CMMarkStack should be followed with a restart + // of the marking. On failure to allocate a new chuck, the caller just returns and forces a restart. + // This approach offers better memory utilization for the G1CMMarkStack, as each iteration of the + // marking potentially involves traversing fewer unmarked nodes in the graph. + + // However, during the reference processing phase, instead of restarting the marking process, the + // G1CMMarkStack is expanded upon failure to allocate a new chunk. The decision between these two + // modes of expansion is determined by the _should_grow parameter. + void set_should_grow() { + _should_grow = true; + } + + size_t capacity() const { return _capacity; } + + bool expand(); + + TaskQueueEntryChunk* allocate_new_chunk(); + }; + + ChunkAllocator _chunk_allocator; char _pad0[DEFAULT_CACHE_LINE_SIZE]; TaskQueueEntryChunk* volatile _free_list; // Linked list of free chunks that can be allocated by users. @@ -148,13 +239,6 @@ private: volatile size_t _chunks_in_chunk_list; char _pad2[DEFAULT_CACHE_LINE_SIZE - sizeof(TaskQueueEntryChunk*) - sizeof(size_t)]; - volatile size_t _hwm; // High water mark within the reserved space. - char _pad4[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; - - // Allocate a new chunk from the reserved memory, using the high water mark. Returns - // null if out of memory. - TaskQueueEntryChunk* allocate_new_chunk(); - // Atomically add the given chunk to the list. void add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem); // Atomically remove and return a chunk from the given list. Returns null if the @@ -167,19 +251,15 @@ private: TaskQueueEntryChunk* remove_chunk_from_chunk_list(); TaskQueueEntryChunk* remove_chunk_from_free_list(); - // Resizes the mark stack to the given new capacity. Releases any previous - // memory if successful. - bool resize(size_t new_capacity); - public: G1CMMarkStack(); - ~G1CMMarkStack(); + ~G1CMMarkStack() = default; // Alignment and minimum capacity of this mark stack in number of oops. static size_t capacity_alignment(); - // Allocate and initialize the mark stack with the given number of oops. - bool initialize(size_t initial_capacity, size_t max_capacity); + // Allocate and initialize the mark stack. + bool initialize(); // Pushes the given buffer containing at most EntriesPerChunk elements on the mark // stack. If less than EntriesPerChunk elements are to be pushed, the array must @@ -197,7 +277,11 @@ private: // _chunk_list. bool is_empty() const { return _chunk_list == nullptr; } - size_t capacity() const { return _chunk_capacity; } + size_t capacity() const { return _chunk_allocator.capacity(); } + + void set_should_grow() { + _chunk_allocator.set_should_grow(); + } // Expand the stack, typically in response to an overflow condition void expand(); diff --git a/test/hotspot/jtreg/gc/g1/TestMarkStackOverflow.java b/test/hotspot/jtreg/gc/g1/TestMarkStackOverflow.java new file mode 100644 index 00000000000..754d811e664 --- /dev/null +++ b/test/hotspot/jtreg/gc/g1/TestMarkStackOverflow.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package gc.g1; + +import java.util.LinkedHashMap; + +/* @test + * @bug 8313212 + * @summary Finalizing objects may create new concurrent marking work during reference processing. + * If the marking work overflows the global mark stack, we should resize the global mark stack + * until MarkStackSizeMax if possible. + * @requires vm.gc.G1 + * @run main/othervm -XX:ActiveProcessorCount=2 -XX:MarkStackSize=1 -Xmx250m gc.g1.TestMarkStackOverflow + */ + +public class TestMarkStackOverflow { + public static void main(String[] args) throws Exception { + for (int i = 0; i < 10; i++) { + Finalizable holder1 = new Finalizable(); + System.out.printf("Used mem %.2f MB\n", getUsedMem()); + } + } + + private static double getUsedMem() { + return (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / (double) (1024 * 1024); + } + + private static class Finalizable { + public static final int NUM_OBJECTS = 200_000; + private final LinkedHashMap list = new LinkedHashMap<>(); + + public Finalizable() { + for (int i = 0; i < NUM_OBJECTS; i++) { + Object entry = new Object(); + list.put(entry, entry); + } + } + + @SuppressWarnings("removal") + protected void finalize() { + System.out.print(""); + } + } +} diff --git a/test/langtools/ProblemList.txt b/test/langtools/ProblemList.txt index a0797b92bd2..363aa8ae9e7 100644 --- a/test/langtools/ProblemList.txt +++ b/test/langtools/ProblemList.txt @@ -66,8 +66,6 @@ tools/javac/annotations/typeAnnotations/referenceinfos/Lambda.java tools/javac/annotations/typeAnnotations/referenceinfos/NestedTypes.java 8057687 generic-all emit correct byte code an attributes for type annotations tools/javac/warnings/suppress/TypeAnnotations.java 8057683 generic-all improve ordering of errors with type annotations tools/javac/modules/SourceInSymlinkTest.java 8180263 windows-all fails when run on a subst drive -tools/javac/lambda/bytecode/TestLambdaBytecodeTargetRelease14.java 8312534 linux-i586 fails with assert "g1ConcurrentMark.cpp: Overflow during reference processing" -tools/javac/varargs/warning/Warn5.java 8312534 linux-i586 fails with assert "g1ConcurrentMark.cpp: Overflow during reference processing" ########################################################################### #