8267834: Refactor G1CardSetAllocator and BufferNode::Allocator to use a common base class
Reviewed-by: kbarrett, tschatzl
This commit is contained in:
parent
fcce24c5b3
commit
341c8bd7f2
src/hotspot/share/gc
g1
g1CardSetMemory.cppg1CardSetMemory.hppg1CardSetMemory.inline.hppg1SegmentedArray.hppg1SegmentedArray.inline.hpp
shared
test/hotspot/gtest/gc/shared
@ -35,11 +35,7 @@ G1CardSetAllocator<Slot>::G1CardSetAllocator(const char* name,
|
||||
const G1CardSetAllocOptions* alloc_options,
|
||||
G1CardSetFreeList* free_segment_list) :
|
||||
_segmented_array(alloc_options, free_segment_list),
|
||||
_transfer_lock(false),
|
||||
_free_slots_list(),
|
||||
_pending_slots_list(),
|
||||
_num_pending_slots(0),
|
||||
_num_free_slots(0)
|
||||
_free_slots_list(name, &_segmented_array)
|
||||
{
|
||||
uint slot_size = _segmented_array.slot_size();
|
||||
assert(slot_size >= sizeof(G1CardSetContainer), "Slot instance size %u for allocator %s too small", slot_size, name);
|
||||
@ -50,74 +46,16 @@ G1CardSetAllocator<Slot>::~G1CardSetAllocator() {
|
||||
drop_all();
|
||||
}
|
||||
|
||||
template <class Slot>
|
||||
bool G1CardSetAllocator<Slot>::try_transfer_pending() {
|
||||
// Attempt to claim the lock.
|
||||
if (Atomic::load_acquire(&_transfer_lock) || // Skip CAS if likely to fail.
|
||||
Atomic::cmpxchg(&_transfer_lock, false, true)) {
|
||||
return false;
|
||||
}
|
||||
// Have the lock; perform the transfer.
|
||||
|
||||
// Claim all the pending slots.
|
||||
G1CardSetContainer* first = _pending_slots_list.pop_all();
|
||||
|
||||
if (first != nullptr) {
|
||||
// Prepare to add the claimed slots, and update _num_pending_slots.
|
||||
G1CardSetContainer* last = first;
|
||||
Atomic::load_acquire(&_num_pending_slots);
|
||||
|
||||
uint count = 1;
|
||||
for (G1CardSetContainer* next = first->next(); next != nullptr; next = next->next()) {
|
||||
last = next;
|
||||
++count;
|
||||
}
|
||||
|
||||
Atomic::sub(&_num_pending_slots, count);
|
||||
|
||||
// Wait for any in-progress pops to avoid ABA for them.
|
||||
GlobalCounter::write_synchronize();
|
||||
// Add synchronized slots to _free_slots_list.
|
||||
// Update count first so there can be no underflow in allocate().
|
||||
Atomic::add(&_num_free_slots, count);
|
||||
_free_slots_list.prepend(*first, *last);
|
||||
}
|
||||
Atomic::release_store(&_transfer_lock, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Slot>
|
||||
void G1CardSetAllocator<Slot>::free(Slot* slot) {
|
||||
assert(slot != nullptr, "precondition");
|
||||
// Desired minimum transfer batch size. There is relatively little
|
||||
// importance to the specific number. It shouldn't be too big, else
|
||||
// we're wasting space when the release rate is low. If the release
|
||||
// rate is high, we might accumulate more than this before being
|
||||
// able to start a new transfer, but that's okay. Also note that
|
||||
// the allocation rate and the release rate are going to be fairly
|
||||
// similar, due to how the slots are used. - kbarret
|
||||
uint const trigger_transfer = 10;
|
||||
|
||||
uint pending_count = Atomic::add(&_num_pending_slots, 1u, memory_order_relaxed);
|
||||
|
||||
G1CardSetContainer* container = reinterpret_cast<G1CardSetContainer*>(reinterpret_cast<char*>(slot));
|
||||
|
||||
container->set_next(nullptr);
|
||||
assert(container->next() == nullptr, "precondition");
|
||||
|
||||
_pending_slots_list.push(*container);
|
||||
|
||||
if (pending_count > trigger_transfer) {
|
||||
try_transfer_pending();
|
||||
}
|
||||
slot->~Slot();
|
||||
_free_slots_list.release(slot);
|
||||
}
|
||||
|
||||
template <class Slot>
|
||||
void G1CardSetAllocator<Slot>::drop_all() {
|
||||
_free_slots_list.pop_all();
|
||||
_pending_slots_list.pop_all();
|
||||
_num_pending_slots = 0;
|
||||
_num_free_slots = 0;
|
||||
_free_slots_list.reset();
|
||||
_segmented_array.drop_all();
|
||||
}
|
||||
|
||||
@ -129,12 +67,13 @@ void G1CardSetAllocator<Slot>::print(outputStream* os) {
|
||||
? _segmented_array.first_array_segment()->num_slots()
|
||||
: 0;
|
||||
uint num_segments = _segmented_array.num_segments();
|
||||
uint num_pending_slots = (uint)_free_slots_list.pending_count();
|
||||
os->print("MA " PTR_FORMAT ": %u slots pending (allocated %u available %u) used %.3f highest %u segments %u size %zu ",
|
||||
p2i(this),
|
||||
_num_pending_slots,
|
||||
num_pending_slots,
|
||||
num_allocated_slots,
|
||||
num_available_slots,
|
||||
percent_of(num_allocated_slots - _num_pending_slots, num_available_slots),
|
||||
percent_of(num_allocated_slots - num_pending_slots, num_available_slots),
|
||||
highest,
|
||||
num_segments,
|
||||
mem_size());
|
||||
|
@ -29,9 +29,9 @@
|
||||
#include "gc/g1/g1CardSetContainers.hpp"
|
||||
#include "gc/g1/g1SegmentedArray.hpp"
|
||||
#include "gc/g1/g1SegmentedArrayFreePool.hpp"
|
||||
#include "gc/shared/freeListAllocator.hpp"
|
||||
#include "memory/allocation.hpp"
|
||||
#include "utilities/growableArray.hpp"
|
||||
#include "utilities/lockFreeStack.hpp"
|
||||
|
||||
class G1CardSetConfiguration;
|
||||
class outputStream;
|
||||
@ -91,23 +91,9 @@ class G1CardSetAllocator {
|
||||
typedef G1SegmentedArray<Slot, mtGCCardSet> SegmentedArray;
|
||||
// G1CardSetContainer slot management within the G1CardSetSegments allocated
|
||||
// by this allocator.
|
||||
static G1CardSetContainer* volatile* next_ptr(G1CardSetContainer& slot);
|
||||
typedef LockFreeStack<G1CardSetContainer, &G1CardSetAllocator::next_ptr> SlotStack;
|
||||
|
||||
SegmentedArray _segmented_array;
|
||||
volatile bool _transfer_lock;
|
||||
SlotStack _free_slots_list;
|
||||
SlotStack _pending_slots_list;
|
||||
|
||||
volatile uint _num_pending_slots; // Number of slots in the pending list.
|
||||
volatile uint _num_free_slots; // Number of slots in the free list.
|
||||
|
||||
// Try to transfer slots from _pending_slots_list to _free_slots_list, with a
|
||||
// synchronization delay for any in-progress pops from the _free_slots_list
|
||||
// to solve ABA here.
|
||||
bool try_transfer_pending();
|
||||
|
||||
uint num_free_slots() const;
|
||||
FreeListAllocator _free_slots_list;
|
||||
|
||||
public:
|
||||
G1CardSetAllocator(const char* name,
|
||||
@ -124,13 +110,15 @@ public:
|
||||
|
||||
size_t mem_size() const {
|
||||
return sizeof(*this) +
|
||||
_segmented_array.num_segments() * sizeof(G1CardSetSegment) + _segmented_array.num_available_slots() *
|
||||
_segmented_array.slot_size();
|
||||
_segmented_array.num_segments() * sizeof(G1CardSetSegment) +
|
||||
_segmented_array.num_available_slots() * _segmented_array.slot_size();
|
||||
}
|
||||
|
||||
size_t wasted_mem_size() const {
|
||||
return (_segmented_array.num_available_slots() - (_segmented_array.num_allocated_slots() - _num_pending_slots)) *
|
||||
_segmented_array.slot_size();
|
||||
uint num_wasted_slots = _segmented_array.num_available_slots() -
|
||||
_segmented_array.num_allocated_slots() -
|
||||
(uint)_free_slots_list.pending_count();
|
||||
return num_wasted_slots * _segmented_array.slot_size();
|
||||
}
|
||||
|
||||
inline uint num_segments() { return _segmented_array.num_segments(); }
|
||||
|
@ -33,30 +33,9 @@
|
||||
#include "gc/g1/g1CardSetContainers.inline.hpp"
|
||||
#include "utilities/globalCounter.inline.hpp"
|
||||
|
||||
template <class Slot>
|
||||
G1CardSetContainer* volatile* G1CardSetAllocator<Slot>::next_ptr(G1CardSetContainer& slot) {
|
||||
return slot.next_addr();
|
||||
}
|
||||
|
||||
template <class Slot>
|
||||
Slot* G1CardSetAllocator<Slot>::allocate() {
|
||||
assert(_segmented_array.slot_size() > 0, "instance size not set.");
|
||||
|
||||
if (num_free_slots() > 0) {
|
||||
// Pop under critical section to deal with ABA problem
|
||||
// Other solutions to the same problem are more complicated (ref counting, HP)
|
||||
GlobalCounter::CriticalSection cs(Thread::current());
|
||||
|
||||
G1CardSetContainer* container = _free_slots_list.pop();
|
||||
if (container != nullptr) {
|
||||
Slot* slot = reinterpret_cast<Slot*>(reinterpret_cast<char*>(container));
|
||||
Atomic::sub(&_num_free_slots, 1u);
|
||||
guarantee(is_aligned(slot, 8), "result " PTR_FORMAT " not aligned", p2i(slot));
|
||||
return slot;
|
||||
}
|
||||
}
|
||||
|
||||
Slot* slot = _segmented_array.allocate();
|
||||
Slot* slot = ::new (_free_slots_list.allocate()) Slot();
|
||||
assert(slot != nullptr, "must be");
|
||||
return slot;
|
||||
}
|
||||
@ -74,9 +53,4 @@ inline void G1CardSetMemoryManager::free_node(void* value) {
|
||||
free(0, value);
|
||||
}
|
||||
|
||||
template <class Slot>
|
||||
inline uint G1CardSetAllocator<Slot>::num_free_slots() const {
|
||||
return Atomic::load(&_num_free_slots);
|
||||
}
|
||||
|
||||
#endif // SHARE_GC_G1_G1CARDSETMEMORY_INLINE_HPP
|
||||
|
@ -26,6 +26,7 @@
|
||||
#ifndef SHARE_GC_G1_G1SEGMENTEDARRAY_HPP
|
||||
#define SHARE_GC_G1_G1SEGMENTEDARRAY_HPP
|
||||
|
||||
#include "gc/shared/freeListAllocator.hpp"
|
||||
#include "memory/allocation.hpp"
|
||||
#include "utilities/lockFreeStack.hpp"
|
||||
|
||||
@ -181,7 +182,7 @@ public:
|
||||
// Their values are only consistent within each other with extra global
|
||||
// synchronization.
|
||||
template <class Slot, MEMFLAGS flag>
|
||||
class G1SegmentedArray {
|
||||
class G1SegmentedArray : public FreeListConfig {
|
||||
// G1SegmentedArrayAllocOptions provides parameters for allocation segment
|
||||
// sizing and expansion.
|
||||
const G1SegmentedArrayAllocOptions* _alloc_options;
|
||||
@ -222,7 +223,10 @@ public:
|
||||
// be called in a globally synchronized area.
|
||||
void drop_all();
|
||||
|
||||
inline Slot* allocate();
|
||||
inline void* allocate() override;
|
||||
|
||||
// We do not deallocate individual slots
|
||||
inline void deallocate(void* node) override { ShouldNotReachHere(); }
|
||||
|
||||
inline uint num_segments() const;
|
||||
|
||||
|
@ -210,7 +210,7 @@ void G1SegmentedArray<Slot, flag>::drop_all() {
|
||||
}
|
||||
|
||||
template <class Slot, MEMFLAGS flag>
|
||||
Slot* G1SegmentedArray<Slot, flag>::allocate() {
|
||||
void* G1SegmentedArray<Slot, flag>::allocate() {
|
||||
assert(slot_size() > 0, "instance size not set.");
|
||||
|
||||
G1SegmentedArraySegment<flag>* cur = Atomic::load_acquire(&_first);
|
||||
|
213
src/hotspot/share/gc/shared/freeListAllocator.cpp
Normal file
213
src/hotspot/share/gc/shared/freeListAllocator.cpp
Normal file
@ -0,0 +1,213 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "gc/shared/freeListAllocator.hpp"
|
||||
#include "logging/log.hpp"
|
||||
#include "utilities/globalCounter.inline.hpp"
|
||||
|
||||
FreeListAllocator::NodeList::NodeList() :
|
||||
_head(nullptr), _tail(nullptr), _entry_count(0) {}
|
||||
|
||||
FreeListAllocator::NodeList::NodeList(FreeNode* head, FreeNode* tail, size_t entry_count) :
|
||||
_head(head), _tail(tail), _entry_count(entry_count)
|
||||
{
|
||||
assert((_head == nullptr) == (_tail == nullptr), "invariant");
|
||||
assert((_head == nullptr) == (_entry_count == 0), "invariant");
|
||||
}
|
||||
|
||||
FreeListAllocator::PendingList::PendingList() :
|
||||
_tail(nullptr), _head(nullptr), _count(0) {}
|
||||
|
||||
size_t FreeListAllocator::PendingList::add(FreeNode* node) {
|
||||
assert(node->next() == nullptr, "precondition");
|
||||
FreeNode* old_head = Atomic::xchg(&_head, node);
|
||||
if (old_head != nullptr) {
|
||||
node->set_next(old_head);
|
||||
} else {
|
||||
assert(_tail == nullptr, "invariant");
|
||||
_tail = node;
|
||||
}
|
||||
return Atomic::add(&_count, size_t(1));
|
||||
}
|
||||
|
||||
typename FreeListAllocator::NodeList FreeListAllocator::PendingList::take_all() {
|
||||
NodeList result{Atomic::load(&_head), _tail, Atomic::load(&_count)};
|
||||
Atomic::store(&_head, (FreeNode*)nullptr);
|
||||
_tail = nullptr;
|
||||
Atomic::store(&_count, size_t(0));
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t FreeListAllocator::PendingList::count() const {
|
||||
return Atomic::load(&_count);
|
||||
}
|
||||
|
||||
FreeListAllocator::FreeListAllocator(const char* name, FreeListConfig* config) :
|
||||
_config(config),
|
||||
_free_count(0),
|
||||
_free_list(),
|
||||
_transfer_lock(false),
|
||||
_active_pending_list(0),
|
||||
_pending_lists()
|
||||
{
|
||||
strncpy(_name, name, sizeof(_name) - 1);
|
||||
_name[sizeof(_name) - 1] = '\0';
|
||||
}
|
||||
|
||||
void FreeListAllocator::delete_list(FreeNode* list) {
|
||||
while (list != nullptr) {
|
||||
FreeNode* next = list->next();
|
||||
list->~FreeNode();
|
||||
_config->deallocate(list);
|
||||
list = next;
|
||||
}
|
||||
}
|
||||
|
||||
FreeListAllocator::~FreeListAllocator() {
|
||||
uint index = Atomic::load(&_active_pending_list);
|
||||
NodeList pending_list = _pending_lists[index].take_all();
|
||||
delete_list(Atomic::load(&pending_list._head));
|
||||
delete_list(_free_list.pop_all());
|
||||
}
|
||||
|
||||
// Drop existing nodes and reset all counters
|
||||
void FreeListAllocator::reset() {
|
||||
uint index = Atomic::load(&_active_pending_list);
|
||||
_pending_lists[index].take_all();
|
||||
_free_list.pop_all();
|
||||
_free_count = 0;
|
||||
}
|
||||
|
||||
size_t FreeListAllocator::free_count() const {
|
||||
return Atomic::load(&_free_count);
|
||||
}
|
||||
|
||||
size_t FreeListAllocator::pending_count() const {
|
||||
uint index = Atomic::load(&_active_pending_list);
|
||||
return _pending_lists[index].count();;
|
||||
}
|
||||
|
||||
// To solve the ABA problem, popping a node from the _free_list is performed within
|
||||
// a GlobalCounter critical section, and pushing nodes onto the _free_list is done
|
||||
// after a GlobalCounter synchronization associated with the nodes to be pushed.
|
||||
void* FreeListAllocator::allocate() {
|
||||
FreeNode* node = nullptr;
|
||||
if (free_count() > 0) {
|
||||
// Protect against ABA; see release().
|
||||
GlobalCounter::CriticalSection cs(Thread::current());
|
||||
node = _free_list.pop();
|
||||
}
|
||||
|
||||
if (node != nullptr) {
|
||||
node->~FreeNode();
|
||||
// Decrement count after getting buffer from free list. This, along
|
||||
// with incrementing count before adding to free list, ensures count
|
||||
// never underflows.
|
||||
size_t count = Atomic::sub(&_free_count, 1u);
|
||||
assert((count + 1) != 0, "_free_count underflow");
|
||||
return node;
|
||||
} else {
|
||||
return _config->allocate();
|
||||
}
|
||||
}
|
||||
|
||||
// The release synchronizes on the critical sections before adding to
|
||||
// the _free_list. But we don't want to make every release have to do a
|
||||
// synchronize. Instead, we initially place released nodes on the pending list,
|
||||
// and transfer them to the _free_list in batches. Only one transfer at a time is
|
||||
// permitted, with a lock bit to control access to that phase. While a transfer
|
||||
// is in progress, other threads might be adding other nodes to the pending list,
|
||||
// to be dealt with by some later transfer.
|
||||
void FreeListAllocator::release(void* free_node) {
|
||||
assert(free_node != nullptr, "precondition");
|
||||
assert(is_aligned(free_node, sizeof(FreeNode)), "Unaligned addr " PTR_FORMAT, p2i(free_node));
|
||||
FreeNode* node = ::new (free_node) FreeNode();
|
||||
|
||||
// The pending list is double-buffered. Add node to the currently active
|
||||
// pending list, within a critical section so a transfer will wait until
|
||||
// we're done with what might be the pending list to be transferred.
|
||||
{
|
||||
GlobalCounter::CriticalSection cs(Thread::current());
|
||||
uint index = Atomic::load_acquire(&_active_pending_list);
|
||||
size_t count = _pending_lists[index].add(node);
|
||||
if (count <= _config->transfer_threshold()) return;
|
||||
}
|
||||
// Attempt transfer when number pending exceeds the transfer threshold.
|
||||
try_transfer_pending();
|
||||
}
|
||||
|
||||
// Try to transfer nodes from the pending list to _free_list, with a
|
||||
// synchronization delay for any in-progress pops from the _free_list,
|
||||
// to solve ABA there. Return true if performed a (possibly empty)
|
||||
// transfer, false if blocked from doing so by some other thread's
|
||||
// in-progress transfer.
|
||||
bool FreeListAllocator::try_transfer_pending() {
|
||||
// Attempt to claim the lock.
|
||||
if (Atomic::load(&_transfer_lock) || // Skip CAS if likely to fail.
|
||||
Atomic::cmpxchg(&_transfer_lock, false, true)) {
|
||||
return false;
|
||||
}
|
||||
// Have the lock; perform the transfer.
|
||||
|
||||
// Change which pending list is active. Don't need an atomic RMW since
|
||||
// we have the lock and we're the only writer.
|
||||
uint index = Atomic::load(&_active_pending_list);
|
||||
uint new_active = (index + 1) % ARRAY_SIZE(_pending_lists);
|
||||
Atomic::release_store(&_active_pending_list, new_active);
|
||||
|
||||
// Wait for all critical sections in the buffer life-cycle to complete.
|
||||
// This includes _free_list pops and adding to the now inactive pending
|
||||
// list.
|
||||
GlobalCounter::write_synchronize();
|
||||
|
||||
// Transfer the inactive pending list to _free_list.
|
||||
NodeList transfer_list = _pending_lists[index].take_all();
|
||||
size_t count = transfer_list._entry_count;
|
||||
if (count > 0) {
|
||||
// Update count first so no underflow in allocate().
|
||||
Atomic::add(&_free_count, count);
|
||||
_free_list.prepend(*transfer_list._head, *transfer_list._tail);
|
||||
log_trace(gc, freelist)
|
||||
("Transferred %s pending to free: %zu", name(), count);
|
||||
}
|
||||
Atomic::release_store(&_transfer_lock, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t FreeListAllocator::reduce_free_list(size_t remove_goal) {
|
||||
try_transfer_pending();
|
||||
size_t removed = 0;
|
||||
for ( ; removed < remove_goal; ++removed) {
|
||||
FreeNode* node = _free_list.pop();
|
||||
if (node == nullptr) break;
|
||||
node->~FreeNode();
|
||||
_config->deallocate(node);
|
||||
}
|
||||
size_t new_count = Atomic::sub(&_free_count, removed);
|
||||
log_debug(gc, freelist)
|
||||
("Reduced %s free list by " SIZE_FORMAT " to " SIZE_FORMAT,
|
||||
name(), removed, new_count);
|
||||
return removed;
|
||||
}
|
161
src/hotspot/share/gc/shared/freeListAllocator.hpp
Normal file
161
src/hotspot/share/gc/shared/freeListAllocator.hpp
Normal file
@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_GC_SHARED_FREELISTALLOCATOR_HPP
|
||||
#define SHARE_GC_SHARED_FREELISTALLOCATOR_HPP
|
||||
|
||||
#include "memory/allocation.hpp"
|
||||
#include "memory/padded.hpp"
|
||||
#include "runtime/atomic.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/lockFreeStack.hpp"
|
||||
|
||||
class FreeListConfig {
|
||||
// Desired minimum transfer batch size. There is relatively little
|
||||
// importance to the specific number. It shouldn't be too big, else
|
||||
// we're wasting space when the release rate is low. If the release
|
||||
// rate is high, we might accumulate more than this before being
|
||||
// able to start a new transfer, but that's okay.
|
||||
const size_t _transfer_threshold;
|
||||
protected:
|
||||
~FreeListConfig() = default;
|
||||
public:
|
||||
explicit FreeListConfig(size_t threshold = 10) : _transfer_threshold(threshold) {}
|
||||
|
||||
size_t transfer_threshold() { return _transfer_threshold; }
|
||||
|
||||
virtual void* allocate() = 0;
|
||||
|
||||
virtual void deallocate(void* node) = 0;
|
||||
};
|
||||
|
||||
// Allocation is based on a lock-free list of nodes. To reduce synchronization
|
||||
// overhead on the free list between allocation and release calls, the released
|
||||
// nodes are first placed on a pending list, then transferred to the free list in
|
||||
// batches. While on the pending list, the nodes are not available for allocation.
|
||||
// The allocator uses allocation options specified by an instance of
|
||||
// FreeListConfig. The FreeListConfig includes an allocation method to use in case
|
||||
// the free list is empty and a deallocation method used to deallocate nodes in
|
||||
// the free list. Additionally, the FreeListConfig configures the threshold used
|
||||
// as a minimum batch size for transferring released nodes from the pending list
|
||||
// to the free list making them available for re-allocation.
|
||||
class FreeListAllocator {
|
||||
struct FreeNode {
|
||||
FreeNode* volatile _next;
|
||||
|
||||
FreeNode() : _next (nullptr) { }
|
||||
|
||||
FreeNode* next() { return Atomic::load(&_next); }
|
||||
|
||||
FreeNode* volatile* next_addr() { return &_next; }
|
||||
|
||||
void set_next(FreeNode* next) { Atomic::store(&_next, next); }
|
||||
};
|
||||
|
||||
struct NodeList {
|
||||
FreeNode* _head; // First node in list or nullptr if empty.
|
||||
FreeNode* _tail; // Last node in list or nullptr if empty.
|
||||
size_t _entry_count; // Sum of entries in nodes in list.
|
||||
|
||||
NodeList();
|
||||
|
||||
NodeList(FreeNode* head, FreeNode* tail, size_t entry_count);
|
||||
};
|
||||
|
||||
class PendingList {
|
||||
FreeNode* _tail;
|
||||
FreeNode* volatile _head;
|
||||
volatile size_t _count;
|
||||
|
||||
NONCOPYABLE(PendingList);
|
||||
|
||||
public:
|
||||
PendingList();
|
||||
~PendingList() = default;
|
||||
|
||||
// Add node to the list. Returns the number of nodes in the list.
|
||||
// Thread-safe against concurrent add operations.
|
||||
size_t add(FreeNode* node);
|
||||
|
||||
size_t count() const;
|
||||
|
||||
// Return the nodes in the list, leaving the list empty.
|
||||
// Not thread-safe.
|
||||
NodeList take_all();
|
||||
};
|
||||
|
||||
static FreeNode* volatile* next_ptr(FreeNode& node) { return node.next_addr(); }
|
||||
typedef LockFreeStack<FreeNode, &next_ptr> Stack;
|
||||
|
||||
FreeListConfig* _config;
|
||||
char _name[DEFAULT_CACHE_LINE_SIZE - sizeof(FreeListConfig*)]; // Use name as padding.
|
||||
|
||||
#define DECLARE_PADDED_MEMBER(Id, Type, Name) \
|
||||
Type Name; DEFINE_PAD_MINUS_SIZE(Id, DEFAULT_CACHE_LINE_SIZE, sizeof(Type))
|
||||
DECLARE_PADDED_MEMBER(1, volatile size_t, _free_count);
|
||||
DECLARE_PADDED_MEMBER(2, Stack, _free_list);
|
||||
DECLARE_PADDED_MEMBER(3, volatile bool, _transfer_lock);
|
||||
#undef DECLARE_PADDED_MEMBER
|
||||
|
||||
volatile uint _active_pending_list;
|
||||
PendingList _pending_lists[2];
|
||||
|
||||
void delete_list(FreeNode* list);
|
||||
|
||||
NONCOPYABLE(FreeListAllocator);
|
||||
|
||||
public:
|
||||
FreeListAllocator(const char* name, FreeListConfig* config);
|
||||
|
||||
const char* name() const { return _name; }
|
||||
|
||||
~FreeListAllocator();
|
||||
|
||||
size_t free_count() const;
|
||||
size_t pending_count() const;
|
||||
|
||||
void* allocate();
|
||||
void release(void* node);
|
||||
|
||||
// Free nodes in the allocator could have been allocated out of an arena.
|
||||
// Therefore, the nodes can be freed at once when entire arena is discarded
|
||||
// without running destructors for the individual nodes. In such cases, reset
|
||||
// method should be called before the ~FreeListAllocator(). Calling the reset
|
||||
// method on nodes not managed by an arena will leak the memory by just dropping
|
||||
// the nodes to the floor.
|
||||
void reset();
|
||||
bool try_transfer_pending();
|
||||
|
||||
size_t mem_size() const {
|
||||
return sizeof(*this);
|
||||
}
|
||||
|
||||
// Deallocate some of the available nodes in the free_list.
|
||||
// remove_goal is the target number to remove. Returns the number
|
||||
// actually deallocated, which may be less than the goal if there
|
||||
// were fewer available.
|
||||
size_t reduce_free_list(size_t remove_goal);
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_SHARED_FREELISTALLOCATOR_HPP
|
@ -24,14 +24,7 @@
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "gc/shared/ptrQueue.hpp"
|
||||
#include "logging/log.hpp"
|
||||
#include "memory/allocation.hpp"
|
||||
#include "memory/allocation.inline.hpp"
|
||||
#include "runtime/atomic.hpp"
|
||||
#include "runtime/mutex.hpp"
|
||||
#include "runtime/mutexLocker.hpp"
|
||||
#include "runtime/thread.inline.hpp"
|
||||
#include "utilities/globalCounter.inline.hpp"
|
||||
|
||||
#include <new>
|
||||
|
||||
@ -45,178 +38,42 @@ PtrQueue::~PtrQueue() {
|
||||
assert(_buf == NULL, "queue must be flushed before delete");
|
||||
}
|
||||
|
||||
BufferNode* BufferNode::allocate(size_t size) {
|
||||
size_t byte_size = size * sizeof(void*);
|
||||
void* data = NEW_C_HEAP_ARRAY(char, buffer_offset() + byte_size, mtGC);
|
||||
return new (data) BufferNode;
|
||||
BufferNode::AllocatorConfig::AllocatorConfig(size_t size) : _buffer_size(size) {}
|
||||
|
||||
void* BufferNode::AllocatorConfig::allocate() {
|
||||
size_t byte_size = _buffer_size * sizeof(void*);
|
||||
return NEW_C_HEAP_ARRAY(char, buffer_offset() + byte_size, mtGC);
|
||||
}
|
||||
|
||||
void BufferNode::deallocate(BufferNode* node) {
|
||||
node->~BufferNode();
|
||||
void BufferNode::AllocatorConfig::deallocate(void* node) {
|
||||
assert(node != nullptr, "precondition");
|
||||
FREE_C_HEAP_ARRAY(char, node);
|
||||
}
|
||||
|
||||
BufferNode::Allocator::PendingList::PendingList() :
|
||||
_tail(nullptr), _head(nullptr), _count(0) {}
|
||||
|
||||
BufferNode::Allocator::PendingList::~PendingList() {
|
||||
delete_list(Atomic::load(&_head));
|
||||
}
|
||||
|
||||
size_t BufferNode::Allocator::PendingList::add(BufferNode* node) {
|
||||
assert(node->next() == nullptr, "precondition");
|
||||
BufferNode* old_head = Atomic::xchg(&_head, node);
|
||||
if (old_head != nullptr) {
|
||||
node->set_next(old_head);
|
||||
} else {
|
||||
assert(_tail == nullptr, "invariant");
|
||||
_tail = node;
|
||||
}
|
||||
return Atomic::add(&_count, size_t(1));
|
||||
}
|
||||
|
||||
BufferNodeList BufferNode::Allocator::PendingList::take_all() {
|
||||
BufferNodeList result{Atomic::load(&_head), _tail, Atomic::load(&_count)};
|
||||
Atomic::store(&_head, (BufferNode*)nullptr);
|
||||
_tail = nullptr;
|
||||
Atomic::store(&_count, size_t(0));
|
||||
return result;
|
||||
}
|
||||
|
||||
BufferNode::Allocator::Allocator(const char* name, size_t buffer_size) :
|
||||
_buffer_size(buffer_size),
|
||||
_pending_lists(),
|
||||
_active_pending_list(0),
|
||||
_free_list(),
|
||||
_free_count(0),
|
||||
_transfer_lock(false)
|
||||
_config(buffer_size),
|
||||
_free_list(name, &_config)
|
||||
{
|
||||
strncpy(_name, name, sizeof(_name) - 1);
|
||||
_name[sizeof(_name) - 1] = '\0';
|
||||
}
|
||||
|
||||
BufferNode::Allocator::~Allocator() {
|
||||
delete_list(_free_list.pop_all());
|
||||
}
|
||||
|
||||
void BufferNode::Allocator::delete_list(BufferNode* list) {
|
||||
while (list != NULL) {
|
||||
BufferNode* next = list->next();
|
||||
DEBUG_ONLY(list->set_next(NULL);)
|
||||
BufferNode::deallocate(list);
|
||||
list = next;
|
||||
}
|
||||
}
|
||||
|
||||
size_t BufferNode::Allocator::free_count() const {
|
||||
return Atomic::load(&_free_count);
|
||||
return _free_list.free_count();
|
||||
}
|
||||
|
||||
BufferNode* BufferNode::Allocator::allocate() {
|
||||
BufferNode* node;
|
||||
{
|
||||
// Protect against ABA; see release().
|
||||
GlobalCounter::CriticalSection cs(Thread::current());
|
||||
node = _free_list.pop();
|
||||
}
|
||||
if (node == NULL) {
|
||||
node = BufferNode::allocate(_buffer_size);
|
||||
} else {
|
||||
// Decrement count after getting buffer from free list. This, along
|
||||
// with incrementing count before adding to free list, ensures count
|
||||
// never underflows.
|
||||
size_t count = Atomic::sub(&_free_count, 1u);
|
||||
assert((count + 1) != 0, "_free_count underflow");
|
||||
}
|
||||
return node;
|
||||
return ::new (_free_list.allocate()) BufferNode();
|
||||
}
|
||||
|
||||
// To solve the ABA problem for lock-free stack pop, allocate does the
|
||||
// pop inside a critical section, and release synchronizes on the
|
||||
// critical sections before adding to the _free_list. But we don't
|
||||
// want to make every release have to do a synchronize. Instead, we
|
||||
// initially place released nodes on the pending list, and transfer
|
||||
// them to the _free_list in batches. Only one transfer at a time is
|
||||
// permitted, with a lock bit to control access to that phase. While
|
||||
// a transfer is in progress, other threads might be adding other nodes
|
||||
// to the pending list, to be dealt with by some later transfer.
|
||||
void BufferNode::Allocator::release(BufferNode* node) {
|
||||
assert(node != NULL, "precondition");
|
||||
assert(node->next() == NULL, "precondition");
|
||||
|
||||
// Desired minimum transfer batch size. There is relatively little
|
||||
// importance to the specific number. It shouldn't be too big, else
|
||||
// we're wasting space when the release rate is low. If the release
|
||||
// rate is high, we might accumulate more than this before being
|
||||
// able to start a new transfer, but that's okay. Also note that
|
||||
// the allocation rate and the release rate are going to be fairly
|
||||
// similar, due to how the buffers are used.
|
||||
const size_t trigger_transfer = 10;
|
||||
|
||||
// The pending list is double-buffered. Add node to the currently active
|
||||
// pending list, within a critical section so a transfer will wait until
|
||||
// we're done with what might be the pending list to be transferred.
|
||||
{
|
||||
GlobalCounter::CriticalSection cs(Thread::current());
|
||||
uint index = Atomic::load_acquire(&_active_pending_list);
|
||||
size_t count = _pending_lists[index].add(node);
|
||||
if (count <= trigger_transfer) return;
|
||||
}
|
||||
// Attempt transfer when number pending exceeds the transfer threshold.
|
||||
try_transfer_pending();
|
||||
}
|
||||
|
||||
// Try to transfer nodes from the pending list to _free_list, with a
|
||||
// synchronization delay for any in-progress pops from the _free_list,
|
||||
// to solve ABA there. Return true if performed a (possibly empty)
|
||||
// transfer, false if blocked from doing so by some other thread's
|
||||
// in-progress transfer.
|
||||
bool BufferNode::Allocator::try_transfer_pending() {
|
||||
// Attempt to claim the lock.
|
||||
if (Atomic::load(&_transfer_lock) || // Skip CAS if likely to fail.
|
||||
Atomic::cmpxchg(&_transfer_lock, false, true)) {
|
||||
return false;
|
||||
}
|
||||
// Have the lock; perform the transfer.
|
||||
|
||||
// Change which pending list is active. Don't need an atomic RMW since
|
||||
// we have the lock and we're the only writer.
|
||||
uint index = Atomic::load(&_active_pending_list);
|
||||
uint new_active = (index + 1) % ARRAY_SIZE(_pending_lists);
|
||||
Atomic::release_store(&_active_pending_list, new_active);
|
||||
|
||||
// Wait for all critical sections in the buffer life-cycle to complete.
|
||||
// This includes _free_list pops and adding to the now inactive pending
|
||||
// list.
|
||||
GlobalCounter::write_synchronize();
|
||||
|
||||
// Transfer the inactive pending list to _free_list.
|
||||
BufferNodeList transfer_list = _pending_lists[index].take_all();
|
||||
size_t count = transfer_list._entry_count;
|
||||
if (count > 0) {
|
||||
// Update count first so no underflow in allocate().
|
||||
Atomic::add(&_free_count, count);
|
||||
_free_list.prepend(*transfer_list._head, *transfer_list._tail);
|
||||
log_trace(gc, ptrqueue, freelist)
|
||||
("Transferred %s pending to free: %zu", name(), count);
|
||||
}
|
||||
Atomic::release_store(&_transfer_lock, false);
|
||||
return true;
|
||||
node->~BufferNode();
|
||||
_free_list.release(node);
|
||||
}
|
||||
|
||||
size_t BufferNode::Allocator::reduce_free_list(size_t remove_goal) {
|
||||
try_transfer_pending();
|
||||
size_t removed = 0;
|
||||
for ( ; removed < remove_goal; ++removed) {
|
||||
BufferNode* node = _free_list.pop();
|
||||
if (node == NULL) break;
|
||||
BufferNode::deallocate(node);
|
||||
}
|
||||
size_t new_count = Atomic::sub(&_free_count, removed);
|
||||
log_debug(gc, ptrqueue, freelist)
|
||||
("Reduced %s free list by " SIZE_FORMAT " to " SIZE_FORMAT,
|
||||
name(), removed, new_count);
|
||||
return removed;
|
||||
return _free_list.reduce_free_list(remove_goal);
|
||||
}
|
||||
|
||||
PtrQueueSet::PtrQueueSet(BufferNode::Allocator* allocator) :
|
||||
|
@ -25,7 +25,7 @@
|
||||
#ifndef SHARE_GC_SHARED_PTRQUEUE_HPP
|
||||
#define SHARE_GC_SHARED_PTRQUEUE_HPP
|
||||
|
||||
#include "gc/shared/bufferNodeList.hpp"
|
||||
#include "gc/shared/freeListAllocator.hpp"
|
||||
#include "memory/padded.hpp"
|
||||
#include "utilities/align.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
@ -132,12 +132,6 @@ class BufferNode {
|
||||
return offset_of(BufferNode, _buffer);
|
||||
}
|
||||
|
||||
// Allocate a new BufferNode with the "buffer" having size elements.
|
||||
static BufferNode* allocate(size_t size);
|
||||
|
||||
// Free a BufferNode.
|
||||
static void deallocate(BufferNode* node);
|
||||
|
||||
public:
|
||||
static BufferNode* volatile* next_ptr(BufferNode& bn) { return &bn._next; }
|
||||
typedef LockFreeStack<BufferNode, &next_ptr> Stack;
|
||||
@ -163,70 +157,48 @@ public:
|
||||
reinterpret_cast<char*>(node) + buffer_offset());
|
||||
}
|
||||
|
||||
class AllocatorConfig;
|
||||
class Allocator; // Free-list based allocator.
|
||||
class TestSupport; // Unit test support.
|
||||
};
|
||||
|
||||
// Allocation is based on a lock-free free list of nodes, linked through
|
||||
// BufferNode::_next (see BufferNode::Stack). To solve the ABA problem,
|
||||
// popping a node from the free list is performed within a GlobalCounter
|
||||
// critical section, and pushing nodes onto the free list is done after
|
||||
// a GlobalCounter synchronization associated with the nodes to be pushed.
|
||||
// This is documented behavior so that other parts of the node life-cycle
|
||||
// can depend on and make use of it too.
|
||||
// We use BufferNode::AllocatorConfig to set the allocation options for the
|
||||
// FreeListAllocator.
|
||||
class BufferNode::AllocatorConfig : public FreeListConfig {
|
||||
const size_t _buffer_size;
|
||||
public:
|
||||
explicit AllocatorConfig(size_t size);
|
||||
|
||||
~AllocatorConfig() = default;
|
||||
|
||||
void* allocate() override;
|
||||
|
||||
void deallocate(void* node) override;
|
||||
|
||||
size_t buffer_size() const { return _buffer_size; }
|
||||
};
|
||||
|
||||
class BufferNode::Allocator {
|
||||
friend class TestSupport;
|
||||
|
||||
// Since we don't expect many instances, and measured >15% speedup
|
||||
// on stress gtest, padding seems like a good tradeoff here.
|
||||
#define DECLARE_PADDED_MEMBER(Id, Type, Name) \
|
||||
Type Name; DEFINE_PAD_MINUS_SIZE(Id, DEFAULT_CACHE_LINE_SIZE, sizeof(Type))
|
||||
|
||||
class PendingList {
|
||||
BufferNode* _tail;
|
||||
DECLARE_PADDED_MEMBER(1, BufferNode* volatile, _head);
|
||||
DECLARE_PADDED_MEMBER(2, volatile size_t, _count);
|
||||
|
||||
NONCOPYABLE(PendingList);
|
||||
|
||||
public:
|
||||
PendingList();
|
||||
~PendingList();
|
||||
|
||||
// Add node to the list. Returns the number of nodes in the list.
|
||||
// Thread-safe against concurrent add operations.
|
||||
size_t add(BufferNode* node);
|
||||
|
||||
// Return the nodes in the list, leaving the list empty.
|
||||
// Not thread-safe.
|
||||
BufferNodeList take_all();
|
||||
};
|
||||
|
||||
const size_t _buffer_size;
|
||||
char _name[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; // Use name as padding.
|
||||
PendingList _pending_lists[2];
|
||||
DECLARE_PADDED_MEMBER(1, volatile uint, _active_pending_list);
|
||||
DECLARE_PADDED_MEMBER(2, Stack, _free_list);
|
||||
DECLARE_PADDED_MEMBER(3, volatile size_t, _free_count);
|
||||
DECLARE_PADDED_MEMBER(4, volatile bool, _transfer_lock);
|
||||
|
||||
#undef DECLARE_PADDED_MEMBER
|
||||
|
||||
static void delete_list(BufferNode* list);
|
||||
bool try_transfer_pending();
|
||||
AllocatorConfig _config;
|
||||
FreeListAllocator _free_list;
|
||||
|
||||
NONCOPYABLE(Allocator);
|
||||
|
||||
public:
|
||||
Allocator(const char* name, size_t buffer_size);
|
||||
~Allocator();
|
||||
~Allocator() = default;
|
||||
|
||||
const char* name() const { return _name; }
|
||||
size_t buffer_size() const { return _buffer_size; }
|
||||
size_t buffer_size() const { return _config.buffer_size(); }
|
||||
size_t free_count() const;
|
||||
BufferNode* allocate();
|
||||
void release(BufferNode* node);
|
||||
|
||||
// If _free_list has items buffered in the pending list, transfer
|
||||
// these to make them available for re-allocation.
|
||||
bool flush_free_list() { return _free_list.try_transfer_pending(); }
|
||||
|
||||
// Deallocate some of the available buffers. remove_goal is the target
|
||||
// number to remove. Returns the number actually deallocated, which may
|
||||
// be less than the goal if there were fewer available.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -38,7 +38,7 @@
|
||||
class BufferNode::TestSupport : AllStatic {
|
||||
public:
|
||||
static bool try_transfer_pending(Allocator* allocator) {
|
||||
return allocator->try_transfer_pending();
|
||||
return allocator->flush_free_list();
|
||||
}
|
||||
|
||||
class CompletedList;
|
||||
@ -71,13 +71,6 @@ TEST_VM(PtrQueueBufferAllocatorTest, test) {
|
||||
}
|
||||
ASSERT_TRUE(BufferNode::TestSupport::try_transfer_pending(&allocator));
|
||||
ASSERT_EQ(node_count, allocator.free_count());
|
||||
for (size_t i = 0; i < node_count; ++i) {
|
||||
if (i == 0) {
|
||||
ASSERT_EQ((BufferNode*)NULL, nodes[i]->next());
|
||||
} else {
|
||||
ASSERT_EQ(nodes[i - 1], nodes[i]->next());
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate nodes from the free list.
|
||||
for (size_t i = 0; i < node_count; ++i) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user