8212826: Make PtrQueue free list lock-free

Add lock-free stack and use in BufferNode::Allocator.

Reviewed-by: tschatzl, sangheki
This commit is contained in:
Kim Barrett 2019-01-19 19:50:01 -05:00
parent 2858e8f4e2
commit 5c83330cf8
10 changed files with 840 additions and 87 deletions

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -55,8 +55,8 @@ G1BarrierSet::G1BarrierSet(G1CardTable* card_table) :
make_barrier_set_c2<G1BarrierSetC2>(), make_barrier_set_c2<G1BarrierSetC2>(),
card_table, card_table,
BarrierSet::FakeRtti(BarrierSet::G1BarrierSet)), BarrierSet::FakeRtti(BarrierSet::G1BarrierSet)),
_satb_mark_queue_buffer_allocator(G1SATBBufferSize, SATB_Q_FL_lock), _satb_mark_queue_buffer_allocator("SATB Buffer Allocator", G1SATBBufferSize),
_dirty_card_queue_buffer_allocator(G1UpdateBufferSize, DirtyCardQ_FL_lock), _dirty_card_queue_buffer_allocator("DC Buffer Allocator", G1UpdateBufferSize),
_satb_mark_queue_set(), _satb_mark_queue_set(),
_dirty_card_queue_set() _dirty_card_queue_set()
{} {}

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -24,12 +24,15 @@
#include "precompiled.hpp" #include "precompiled.hpp"
#include "gc/shared/ptrQueue.hpp" #include "gc/shared/ptrQueue.hpp"
#include "logging/log.hpp"
#include "memory/allocation.hpp" #include "memory/allocation.hpp"
#include "memory/allocation.inline.hpp" #include "memory/allocation.inline.hpp"
#include "runtime/atomic.hpp" #include "runtime/atomic.hpp"
#include "runtime/mutex.hpp" #include "runtime/mutex.hpp"
#include "runtime/mutexLocker.hpp" #include "runtime/mutexLocker.hpp"
#include "runtime/orderAccess.hpp"
#include "runtime/thread.inline.hpp" #include "runtime/thread.inline.hpp"
#include "utilities/globalCounter.inline.hpp"
#include <new> #include <new>
@ -85,20 +88,29 @@ void BufferNode::deallocate(BufferNode* node) {
FREE_C_HEAP_ARRAY(char, node); FREE_C_HEAP_ARRAY(char, node);
} }
BufferNode::Allocator::Allocator(size_t buffer_size, Mutex* lock) : BufferNode::Allocator::Allocator(const char* name, size_t buffer_size) :
_buffer_size(buffer_size), _buffer_size(buffer_size),
_lock(lock), _pending_list(),
_free_list(NULL), _free_list(),
_free_count(0) _pending_count(0),
_free_count(0),
_transfer_lock(false)
{ {
assert(lock != NULL, "precondition"); strncpy(_name, name, sizeof(_name));
_name[sizeof(_name) - 1] = '\0';
} }
BufferNode::Allocator::~Allocator() { BufferNode::Allocator::~Allocator() {
while (_free_list != NULL) { delete_list(_free_list.pop_all());
BufferNode* node = _free_list; delete_list(_pending_list.pop_all());
_free_list = node->next(); }
BufferNode::deallocate(node);
void BufferNode::Allocator::delete_list(BufferNode* list) {
while (list != NULL) {
BufferNode* next = list->next();
DEBUG_ONLY(list->set_next(NULL);)
BufferNode::deallocate(list);
list = next;
} }
} }
@ -107,55 +119,109 @@ size_t BufferNode::Allocator::free_count() const {
} }
BufferNode* BufferNode::Allocator::allocate() { BufferNode* BufferNode::Allocator::allocate() {
BufferNode* node = NULL; BufferNode* node;
{ {
MutexLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); // Protect against ABA; see release().
node = _free_list; GlobalCounter::CriticalSection cs(Thread::current());
if (node != NULL) { node = _free_list.pop();
_free_list = node->next(); }
--_free_count; if (node == NULL) {
node->set_next(NULL); node = BufferNode::allocate(_buffer_size);
node->set_index(0); } else {
// Decrement count after getting buffer from free list. This, along
// with incrementing count before adding to free list, ensures count
// never underflows.
size_t count = Atomic::sub(1u, &_free_count);
assert((count + 1) != 0, "_free_count underflow");
}
return node; return node;
} }
}
return BufferNode::allocate(_buffer_size);
}
// To solve the ABA problem for lock-free stack pop, allocate does the
// pop inside a critical section, and release synchronizes on the
// critical sections before adding to the _free_list. But we don't
// want to make every release have to do a synchronize. Instead, we
// initially place released nodes on the _pending_list, and transfer
// them to the _free_list in batches. Only one transfer at a time is
// permitted, with a lock bit to control access to that phase. A
// transfer takes all the nodes from the _pending_list, synchronizes on
// the _free_list pops, and then adds the former pending nodes to the
// _free_list. While that's happening, other threads might be adding
// other nodes to the _pending_list, to be dealt with by some later
// transfer.
void BufferNode::Allocator::release(BufferNode* node) { void BufferNode::Allocator::release(BufferNode* node) {
MutexLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); assert(node != NULL, "precondition");
node->set_next(_free_list); assert(node->next() == NULL, "precondition");
_free_list = node;
++_free_count; // Desired minimum transfer batch size. There is relatively little
// importance to the specific number. It shouldn't be too big, else
// we're wasting space when the release rate is low. If the release
// rate is high, we might accumulate more than this before being
// able to start a new transfer, but that's okay. Also note that
// the allocation rate and the release rate are going to be fairly
// similar, due to how the buffers are used.
const size_t trigger_transfer = 10;
// Add to pending list. Update count first so no underflow in transfer.
size_t pending_count = Atomic::add(1u, &_pending_count);
_pending_list.push(*node);
if (pending_count > trigger_transfer) {
try_transfer_pending();
}
} }
void BufferNode::Allocator::reduce_free_list() { // Try to transfer nodes from _pending_list to _free_list, with a
BufferNode* head = NULL; // synchronization delay for any in-progress pops from the _free_list,
{ // to solve ABA there. Return true if performed a (possibly empty)
MutexLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); // transfer, false if blocked from doing so by some other thread's
// For now, delete half. // in-progress transfer.
size_t remove = _free_count / 2; bool BufferNode::Allocator::try_transfer_pending() {
if (remove > 0) { // Attempt to claim the lock.
head = _free_list; if (Atomic::load(&_transfer_lock) || // Skip CAS if likely to fail.
BufferNode* tail = head; Atomic::cmpxchg(true, &_transfer_lock, false)) {
BufferNode* prev = NULL; return false;
for (size_t i = 0; i < remove; ++i) {
assert(tail != NULL, "free list size is wrong");
prev = tail;
tail = tail->next();
} }
assert(prev != NULL, "invariant"); // Have the lock; perform the transfer.
assert(prev->next() == tail, "invariant");
prev->set_next(NULL); // Claim all the pending nodes.
_free_list = tail; BufferNode* first = _pending_list.pop_all();
_free_count -= remove; if (first != NULL) {
// Prepare to add the claimed nodes, and update _pending_count.
BufferNode* last = first;
size_t count = 1;
for (BufferNode* next = first->next(); next != NULL; next = next->next()) {
last = next;
++count;
} }
Atomic::sub(count, &_pending_count);
// Wait for any in-progress pops, to avoid ABA for them.
GlobalCounter::write_synchronize();
// Add synchronized nodes to _free_list.
// Update count first so no underflow in allocate().
Atomic::add(count, &_free_count);
_free_list.prepend(*first, *last);
log_trace(gc, ptrqueue, freelist)
("Transferred %s pending to free: " SIZE_FORMAT, name(), count);
} }
while (head != NULL) { OrderAccess::release_store(&_transfer_lock, false);
BufferNode* next = head->next(); return true;
BufferNode::deallocate(head);
head = next;
} }
size_t BufferNode::Allocator::reduce_free_list(size_t remove_goal) {
try_transfer_pending();
size_t removed = 0;
for ( ; removed < remove_goal; ++removed) {
BufferNode* node = _free_list.pop();
if (node == NULL) break;
BufferNode::deallocate(node);
}
size_t new_count = Atomic::sub(removed, &_free_count);
log_debug(gc, ptrqueue, freelist)
("Reduced %s free list by " SIZE_FORMAT " to " SIZE_FORMAT,
name(), removed, new_count);
return removed;
} }
PtrQueueSet::PtrQueueSet(bool notify_when_complete) : PtrQueueSet::PtrQueueSet(bool notify_when_complete) :

View File

@ -25,7 +25,10 @@
#ifndef SHARE_GC_SHARED_PTRQUEUE_HPP #ifndef SHARE_GC_SHARED_PTRQUEUE_HPP
#define SHARE_GC_SHARED_PTRQUEUE_HPP #define SHARE_GC_SHARED_PTRQUEUE_HPP
#include "memory/padded.hpp"
#include "utilities/align.hpp" #include "utilities/align.hpp"
#include "utilities/debug.hpp"
#include "utilities/lockFreeStack.hpp"
#include "utilities/sizes.hpp" #include "utilities/sizes.hpp"
class Mutex; class Mutex;
@ -215,7 +218,7 @@ protected:
class BufferNode { class BufferNode {
size_t _index; size_t _index;
BufferNode* _next; BufferNode* volatile _next;
void* _buffer[1]; // Pseudo flexible array member. void* _buffer[1]; // Pseudo flexible array member.
BufferNode() : _index(0), _next(NULL) { } BufferNode() : _index(0), _next(NULL) { }
@ -225,6 +228,8 @@ class BufferNode {
return offset_of(BufferNode, _buffer); return offset_of(BufferNode, _buffer);
} }
static BufferNode* volatile* next_ptr(BufferNode& bn) { return &bn._next; }
AIX_ONLY(public:) // xlC 12 on AIX doesn't implement C++ DR45. AIX_ONLY(public:) // xlC 12 on AIX doesn't implement C++ DR45.
// Allocate a new BufferNode with the "buffer" having size elements. // Allocate a new BufferNode with the "buffer" having size elements.
static BufferNode* allocate(size_t size); static BufferNode* allocate(size_t size);
@ -233,6 +238,8 @@ AIX_ONLY(public:) // xlC 12 on AIX doesn't implement C++ DR45.
static void deallocate(BufferNode* node); static void deallocate(BufferNode* node);
public: public:
typedef LockFreeStack<BufferNode, &next_ptr> Stack;
BufferNode* next() const { return _next; } BufferNode* next() const { return _next; }
void set_next(BufferNode* n) { _next = n; } void set_next(BufferNode* n) { _next = n; }
size_t index() const { return _index; } size_t index() const { return _index; }
@ -254,23 +261,52 @@ public:
reinterpret_cast<char*>(node) + buffer_offset()); reinterpret_cast<char*>(node) + buffer_offset());
} }
// Free-list based allocator. class Allocator; // Free-list based allocator.
class Allocator { class TestSupport; // Unit test support.
size_t _buffer_size; };
Mutex* _lock;
BufferNode* _free_list; // Allocation is based on a lock-free free list of nodes, linked through
volatile size_t _free_count; // BufferNode::_next (see BufferNode::Stack). To solve the ABA problem,
// popping a node from the free list is performed within a GlobalCounter
// critical section, and pushing nodes onto the free list is done after
// a GlobalCounter synchronization associated with the nodes to be pushed.
// This is documented behavior so that other parts of the node life-cycle
// can depend on and make use of it too.
class BufferNode::Allocator {
friend class TestSupport;
// Since we don't expect many instances, and measured >15% speedup
// on stress gtest, padding seems like a good tradeoff here.
#define DECLARE_PADDED_MEMBER(Id, Type, Name) \
Type Name; DEFINE_PAD_MINUS_SIZE(Id, DEFAULT_CACHE_LINE_SIZE, sizeof(Type))
const size_t _buffer_size;
char _name[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; // Use name as padding.
DECLARE_PADDED_MEMBER(1, Stack, _pending_list);
DECLARE_PADDED_MEMBER(2, Stack, _free_list);
DECLARE_PADDED_MEMBER(3, volatile size_t, _pending_count);
DECLARE_PADDED_MEMBER(4, volatile size_t, _free_count);
DECLARE_PADDED_MEMBER(5, volatile bool, _transfer_lock);
#undef DECLARE_PADDED_MEMBER
void delete_list(BufferNode* list);
bool try_transfer_pending();
public: public:
Allocator(size_t buffer_size, Mutex* lock); Allocator(const char* name, size_t buffer_size);
~Allocator(); ~Allocator();
const char* name() const { return _name; }
size_t buffer_size() const { return _buffer_size; } size_t buffer_size() const { return _buffer_size; }
size_t free_count() const; size_t free_count() const;
BufferNode* allocate(); BufferNode* allocate();
void release(BufferNode* node); void release(BufferNode* node);
void reduce_free_list();
}; // Deallocate some of the available buffers. remove_goal is the target
// number to remove. Returns the number actually deallocated, which may
// be less than the goal if there were fewer available.
size_t reduce_free_list(size_t remove_goal);
}; };
// A PtrQueueSet represents resources common to a set of pointer queues. // A PtrQueueSet represents resources common to a set of pointer queues.

View File

@ -133,6 +133,7 @@
LOG_TAG(reloc) \ LOG_TAG(reloc) \
LOG_TAG(remset) \ LOG_TAG(remset) \
LOG_TAG(parser) \ LOG_TAG(parser) \
LOG_TAG(ptrqueue) \
LOG_TAG(purge) \ LOG_TAG(purge) \
LOG_TAG(resolve) \ LOG_TAG(resolve) \
LOG_TAG(safepoint) \ LOG_TAG(safepoint) \

View File

@ -25,6 +25,7 @@
#ifndef SHARE_MEMORY_PADDED_HPP #ifndef SHARE_MEMORY_PADDED_HPP
#define SHARE_MEMORY_PADDED_HPP #define SHARE_MEMORY_PADDED_HPP
#include "memory/allocation.hpp"
#include "utilities/align.hpp" #include "utilities/align.hpp"
#include "utilities/globalDefinitions.hpp" #include "utilities/globalDefinitions.hpp"

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -79,10 +79,8 @@ Mutex* NonJavaThreadsList_lock = NULL;
Monitor* CGC_lock = NULL; Monitor* CGC_lock = NULL;
Monitor* STS_lock = NULL; Monitor* STS_lock = NULL;
Monitor* FullGCCount_lock = NULL; Monitor* FullGCCount_lock = NULL;
Mutex* SATB_Q_FL_lock = NULL;
Monitor* SATB_Q_CBL_mon = NULL; Monitor* SATB_Q_CBL_mon = NULL;
Mutex* Shared_SATB_Q_lock = NULL; Mutex* Shared_SATB_Q_lock = NULL;
Mutex* DirtyCardQ_FL_lock = NULL;
Monitor* DirtyCardQ_CBL_mon = NULL; Monitor* DirtyCardQ_CBL_mon = NULL;
Mutex* Shared_DirtyCardQ_lock = NULL; Mutex* Shared_DirtyCardQ_lock = NULL;
Mutex* MarkStackFreeList_lock = NULL; Mutex* MarkStackFreeList_lock = NULL;
@ -214,11 +212,9 @@ void mutex_init() {
def(FullGCCount_lock , PaddedMonitor, leaf, true, Monitor::_safepoint_check_never); // in support of ExplicitGCInvokesConcurrent def(FullGCCount_lock , PaddedMonitor, leaf, true, Monitor::_safepoint_check_never); // in support of ExplicitGCInvokesConcurrent
} }
if (UseG1GC) { if (UseG1GC) {
def(SATB_Q_FL_lock , PaddedMutex , access, true, Monitor::_safepoint_check_never);
def(SATB_Q_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never); def(SATB_Q_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never);
def(Shared_SATB_Q_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never); def(Shared_SATB_Q_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never);
def(DirtyCardQ_FL_lock , PaddedMutex , access, true, Monitor::_safepoint_check_never);
def(DirtyCardQ_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never); def(DirtyCardQ_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never);
def(Shared_DirtyCardQ_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never); def(Shared_DirtyCardQ_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never);
@ -235,7 +231,6 @@ void mutex_init() {
def(MonitoringSupport_lock , PaddedMutex , native , true, Monitor::_safepoint_check_never); // used for serviceability monitoring support def(MonitoringSupport_lock , PaddedMutex , native , true, Monitor::_safepoint_check_never); // used for serviceability monitoring support
} }
if (UseShenandoahGC) { if (UseShenandoahGC) {
def(SATB_Q_FL_lock , PaddedMutex , access, true, Monitor::_safepoint_check_never);
def(SATB_Q_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never); def(SATB_Q_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never);
def(Shared_SATB_Q_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never); def(Shared_SATB_Q_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never);

View File

@ -76,16 +76,12 @@ extern Monitor* CGC_lock; // used for coordination betwee
// fore- & background GC threads. // fore- & background GC threads.
extern Monitor* STS_lock; // used for joining/leaving SuspendibleThreadSet. extern Monitor* STS_lock; // used for joining/leaving SuspendibleThreadSet.
extern Monitor* FullGCCount_lock; // in support of "concurrent" full gc extern Monitor* FullGCCount_lock; // in support of "concurrent" full gc
extern Mutex* SATB_Q_FL_lock; // Protects SATB Q
// buffer free list.
extern Monitor* SATB_Q_CBL_mon; // Protects SATB Q extern Monitor* SATB_Q_CBL_mon; // Protects SATB Q
// completed buffer queue. // completed buffer queue.
extern Mutex* Shared_SATB_Q_lock; // Lock protecting SATB extern Mutex* Shared_SATB_Q_lock; // Lock protecting SATB
// queue shared by // queue shared by
// non-Java threads. // non-Java threads.
extern Mutex* DirtyCardQ_FL_lock; // Protects dirty card Q
// buffer free list.
extern Monitor* DirtyCardQ_CBL_mon; // Protects dirty card Q extern Monitor* DirtyCardQ_CBL_mon; // Protects dirty card Q
// completed buffer queue. // completed buffer queue.
extern Mutex* Shared_DirtyCardQ_lock; // Lock protecting dirty card extern Mutex* Shared_DirtyCardQ_lock; // Lock protecting dirty card

View File

@ -0,0 +1,177 @@
/*
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_UTILITIES_LOCKFREESTACK_HPP
#define SHARE_UTILITIES_LOCKFREESTACK_HPP
#include "runtime/atomic.hpp"
#include "utilities/debug.hpp"
#include "utilities/macros.hpp"
// The LockFreeStack class template provides a lock-free LIFO. The objects
// in the sequence are intrusively linked via a member in the objects. As
// a result, there is no allocation involved in adding objects to the stack
// or removing them from the stack.
//
// To be used in a LockFreeStack of objects of type T, an object of
// type T must have a list entry member of type T* volatile, with an
// non-member accessor function returning a pointer to that member. A
// LockFreeStack is associated with the class of its elements and an
// entry member from that class.
//
// An object can be in multiple stacks at the same time, so long as
// each stack uses a different entry member. That is, the class of the
// object must have multiple LockFreeStack entry members, one for each
// stack in which the object may simultaneously be an element.
//
// LockFreeStacks support polymorphic elements. Because the objects
// in a stack are externally managed, rather than being embedded
// values in the stack, the actual type of such objects may be more
// specific than the stack's element type.
//
// \tparam T is the class of the elements in the stack.
//
// \tparam next_ptr is a function pointer. Applying this function to
// an object of type T must return a pointer to the list entry member
// of the object associated with the LockFreeStack type.
template<typename T, T* volatile* (*next_ptr)(T&)>
class LockFreeStack {
T* volatile _top;
void prepend_impl(T* first, T* last) {
T* cur = top();
T* old;
do {
old = cur;
set_next(*last, cur);
cur = Atomic::cmpxchg(first, &_top, cur);
} while (old != cur);
}
// Noncopyable.
LockFreeStack(const LockFreeStack&);
LockFreeStack& operator=(const LockFreeStack&);
public:
LockFreeStack() : _top(NULL) {}
~LockFreeStack() { assert(empty(), "stack not empty"); }
// Atomically removes the top object from this stack and returns a
// pointer to that object, or NULL if this stack is empty. Acts as a
// full memory barrier. Subject to ABA behavior; callers must ensure
// usage is safe.
T* pop() {
T* result = top();
T* old;
do {
old = result;
T* new_top = NULL;
if (result != NULL) {
new_top = next(*result);
}
// CAS even on empty pop, for consistent membar bahavior.
result = Atomic::cmpxchg(new_top, &_top, result);
} while (result != old);
if (result != NULL) {
set_next(*result, NULL);
}
return result;
}
// Atomically exchange the list of elements with NULL, returning the old
// list of elements. Acts as a full memory barrier.
// postcondition: empty()
T* pop_all() {
return Atomic::xchg((T*)NULL, &_top);
}
// Atomically adds value to the top of this stack. Acts as a full
// memory barrier.
void push(T& value) {
assert(next(value) == NULL, "precondition");
prepend_impl(&value, &value);
}
// Atomically adds the list of objects (designated by first and
// last) before the objects already in this stack, in the same order
// as in the list. Acts as a full memory barrier.
// precondition: next(last) == NULL.
// postcondition: top() == &first, next(last) == old top().
void prepend(T& first, T& last) {
assert(next(last) == NULL, "precondition");
#ifdef ASSERT
for (T* p = &first; p != &last; p = next(*p)) {
assert(p != NULL, "invalid prepend list");
}
#endif
prepend_impl(&first, &last);
}
// Atomically adds the list of objects headed by first before the
// objects already in this stack, in the same order as in the list.
// Acts as a full memory barrier.
// postcondition: top() == &first.
void prepend(T& first) {
T* last = &first;
while (true) {
T* step_to = next(*last);
if (step_to == NULL) break;
last = step_to;
}
prepend_impl(&first, last);
}
// Return true if the stack is empty.
bool empty() const { return top() == NULL; }
// Return the most recently pushed element, or NULL if the stack is empty.
// The returned element is not removed from the stack.
T* top() const { return Atomic::load(&_top); }
// Return the number of objects in the stack. There must be no concurrent
// pops while the length is being determined.
size_t length() const {
size_t result = 0;
for (const T* current = top(); current != NULL; current = next(*current)) {
++result;
}
return result;
}
// Return the entry following value in the list used by the
// specialized LockFreeStack class.
static T* next(const T& value) {
return Atomic::load(next_ptr(const_cast<T&>(value)));
}
// Set the entry following value to new_next in the list used by the
// specialized LockFreeStack class. Not thread-safe; in particular,
// if value is in an instance of this specialization of LockFreeStack,
// there must be no concurrent push or pop operations on that stack.
static void set_next(T& value, T* new_next) {
Atomic::store(new_next, next_ptr(value));
}
};
#endif // SHARE_UTILITIES_LOCKFREESTACK_HPP

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -24,14 +24,37 @@
#include "precompiled.hpp" #include "precompiled.hpp"
#include "gc/shared/ptrQueue.hpp" #include "gc/shared/ptrQueue.hpp"
#include "runtime/mutex.hpp" #include "memory/allocation.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/orderAccess.hpp"
#include "runtime/semaphore.inline.hpp"
#include "runtime/thread.hpp"
#include "utilities/globalCounter.inline.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/ostream.hpp"
#include "threadHelper.inline.hpp"
#include "unittest.hpp" #include "unittest.hpp"
class BufferNode::TestSupport : AllStatic {
public:
static bool try_transfer_pending(Allocator* allocator) {
return allocator->try_transfer_pending();
}
class CompletedList;
class AllocatorThread;
class ProcessorThread;
};
typedef BufferNode::TestSupport::CompletedList CompletedList;
typedef BufferNode::TestSupport::AllocatorThread AllocatorThread;
typedef BufferNode::TestSupport::ProcessorThread ProcessorThread;
// Some basic testing of BufferNode::Allocator. // Some basic testing of BufferNode::Allocator.
TEST_VM(PtrQueueBufferAllocatorTest, test) { TEST_VM(PtrQueueBufferAllocatorTest, test) {
Mutex m(Mutex::leaf, "PtrQueueBufferAllocatorTest", const size_t buffer_size = 256;
false, Mutex::_safepoint_check_never); BufferNode::Allocator allocator("Test Buffer Allocator", buffer_size);
BufferNode::Allocator allocator(256, &m); ASSERT_EQ(buffer_size, allocator.buffer_size());
// Allocate some new nodes for use in testing. // Allocate some new nodes for use in testing.
BufferNode* nodes[10] = {}; BufferNode* nodes[10] = {};
@ -44,8 +67,11 @@ TEST_VM(PtrQueueBufferAllocatorTest, test) {
// Release the nodes, adding them to the allocator's free list. // Release the nodes, adding them to the allocator's free list.
for (size_t i = 0; i < node_count; ++i) { for (size_t i = 0; i < node_count; ++i) {
ASSERT_EQ(i, allocator.free_count());
allocator.release(nodes[i]); allocator.release(nodes[i]);
}
ASSERT_TRUE(BufferNode::TestSupport::try_transfer_pending(&allocator));
ASSERT_EQ(node_count, allocator.free_count());
for (size_t i = 0; i < node_count; ++i) {
if (i == 0) { if (i == 0) {
ASSERT_EQ((BufferNode*)NULL, nodes[i]->next()); ASSERT_EQ((BufferNode*)NULL, nodes[i]->next());
} else { } else {
@ -56,7 +82,6 @@ TEST_VM(PtrQueueBufferAllocatorTest, test) {
// Allocate nodes from the free list. // Allocate nodes from the free list.
for (size_t i = 0; i < node_count; ++i) { for (size_t i = 0; i < node_count; ++i) {
size_t j = node_count - i; size_t j = node_count - i;
ASSERT_EQ(j, allocator.free_count());
ASSERT_EQ(nodes[j - 1], allocator.allocate()); ASSERT_EQ(nodes[j - 1], allocator.allocate());
} }
ASSERT_EQ(0u, allocator.free_count()); ASSERT_EQ(0u, allocator.free_count());
@ -65,11 +90,161 @@ TEST_VM(PtrQueueBufferAllocatorTest, test) {
for (size_t i = 0; i < node_count; ++i) { for (size_t i = 0; i < node_count; ++i) {
allocator.release(nodes[i]); allocator.release(nodes[i]);
} }
ASSERT_TRUE(BufferNode::TestSupport::try_transfer_pending(&allocator));
ASSERT_EQ(node_count, allocator.free_count()); ASSERT_EQ(node_count, allocator.free_count());
// Destroy some nodes in the free list. // Destroy some nodes in the free list.
// We don't have a way to verify destruction, but we can at // We don't have a way to verify destruction, but we can at
// leat verify we don't crash along the way. // least verify we don't crash along the way.
allocator.reduce_free_list(); size_t count = allocator.free_count();
ASSERT_EQ(count, allocator.reduce_free_list(count));
// destroy allocator. // destroy allocator.
} }
// Stress test with lock-free allocator and completed buffer list.
// Completed buffer list pop avoids ABA by also being in a critical
// section that is synchronized by the allocator's release.
class BufferNode::TestSupport::CompletedList {
BufferNode::Stack _completed_list;
public:
CompletedList() : _completed_list() {}
~CompletedList() {
assert(_completed_list.empty(), "completed list not empty");
}
void push(BufferNode* node) {
assert(node != NULL, "precondition");
_completed_list.push(*node);
}
BufferNode* pop() {
GlobalCounter::CriticalSection cs(Thread::current());
return _completed_list.pop();
}
};
// Simulate a mutator thread, allocating buffers and adding them to
// the completed buffer list.
class BufferNode::TestSupport::AllocatorThread : public JavaTestThread {
BufferNode::Allocator* _allocator;
CompletedList* _cbl;
volatile size_t* _total_allocations;
volatile bool* _continue_running;
size_t _allocations;
public:
AllocatorThread(Semaphore* post,
BufferNode::Allocator* allocator,
CompletedList* cbl,
volatile size_t* total_allocations,
volatile bool* continue_running) :
JavaTestThread(post),
_allocator(allocator),
_cbl(cbl),
_total_allocations(total_allocations),
_continue_running(continue_running),
_allocations(0)
{}
virtual void main_run() {
while (OrderAccess::load_acquire(_continue_running)) {
BufferNode* node = _allocator->allocate();
_cbl->push(node);
++_allocations;
ThreadBlockInVM tbiv(this); // Safepoint check.
}
tty->print_cr("allocations: " SIZE_FORMAT, _allocations);
Atomic::add(_allocations, _total_allocations);
}
};
// Simulate a GC thread, taking buffers from the completed buffer list
// and returning them to the allocator.
class BufferNode::TestSupport::ProcessorThread : public JavaTestThread {
BufferNode::Allocator* _allocator;
CompletedList* _cbl;
volatile bool* _continue_running;
public:
ProcessorThread(Semaphore* post,
BufferNode::Allocator* allocator,
CompletedList* cbl,
volatile bool* continue_running) :
JavaTestThread(post),
_allocator(allocator),
_cbl(cbl),
_continue_running(continue_running)
{}
virtual void main_run() {
while (true) {
BufferNode* node = _cbl->pop();
if (node != NULL) {
_allocator->release(node);
} else if (!OrderAccess::load_acquire(_continue_running)) {
return;
}
ThreadBlockInVM tbiv(this); // Safepoint check.
}
}
};
static void run_test(BufferNode::Allocator* allocator, CompletedList* cbl) {
const uint nthreads = 4;
const uint milliseconds_to_run = 1000;
Semaphore post;
volatile size_t total_allocations = 0;
volatile bool allocator_running = true;
volatile bool processor_running = true;
ProcessorThread* proc_threads[nthreads] = {};
for (uint i = 0; i < nthreads; ++i) {
proc_threads[i] = new ProcessorThread(&post,
allocator,
cbl,
&processor_running);
proc_threads[i]->doit();
}
AllocatorThread* alloc_threads[nthreads] = {};
for (uint i = 0; i < nthreads; ++i) {
alloc_threads[i] = new AllocatorThread(&post,
allocator,
cbl,
&total_allocations,
&allocator_running);
alloc_threads[i]->doit();
}
JavaThread* this_thread = JavaThread::current();
tty->print_cr("Stressing allocator for %u ms", milliseconds_to_run);
{
ThreadInVMfromNative invm(this_thread);
os::sleep(this_thread, milliseconds_to_run, true);
}
OrderAccess::release_store(&allocator_running, false);
for (uint i = 0; i < nthreads; ++i) {
ThreadInVMfromNative invm(this_thread);
post.wait_with_safepoint_check(this_thread);
}
OrderAccess::release_store(&processor_running, false);
for (uint i = 0; i < nthreads; ++i) {
ThreadInVMfromNative invm(this_thread);
post.wait_with_safepoint_check(this_thread);
}
ASSERT_TRUE(BufferNode::TestSupport::try_transfer_pending(allocator));
tty->print_cr("total allocations: " SIZE_FORMAT, total_allocations);
tty->print_cr("allocator free count: " SIZE_FORMAT, allocator->free_count());
}
const size_t buffer_size = 1024;
TEST_VM(PtrQueueBufferAllocatorTest, stress_free_list_allocator) {
BufferNode::Allocator allocator("Test Allocator", buffer_size);
CompletedList completed;
run_test(&allocator, &completed);
}

View File

@ -0,0 +1,306 @@
/*
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "runtime/atomic.hpp"
#include "runtime/orderAccess.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/lockFreeStack.hpp"
#include "threadHelper.inline.hpp"
#include "unittest.hpp"
#include <new>
class LockFreeStackTestElement {
typedef LockFreeStackTestElement Element;
Element* volatile _entry;
Element* volatile _entry1;
size_t _id;
static Element* volatile* entry_ptr(Element& e) { return &e._entry; }
static Element* volatile* entry1_ptr(Element& e) { return &e._entry1; }
public:
LockFreeStackTestElement(size_t id = 0) : _entry(), _entry1(), _id(id) {}
size_t id() const { return _id; }
void set_id(size_t value) { _id = value; }
typedef LockFreeStack<Element, &entry_ptr> Stack;
typedef LockFreeStack<Element, &entry1_ptr> Stack1;
};
typedef LockFreeStackTestElement Element;
typedef Element::Stack Stack;
typedef Element::Stack1 Stack1;
static void initialize_ids(Element* elements, size_t size) {
for (size_t i = 0; i < size; ++i) {
elements[i].set_id(i);
}
}
class LockFreeStackTestBasics : public ::testing::Test {
public:
LockFreeStackTestBasics();
static const size_t nelements = 10;
Element elements[nelements];
Stack stack;
private:
void initialize();
};
const size_t LockFreeStackTestBasics::nelements;
LockFreeStackTestBasics::LockFreeStackTestBasics() : stack() {
initialize_ids(elements, nelements);
initialize();
}
void LockFreeStackTestBasics::initialize() {
ASSERT_TRUE(stack.empty());
ASSERT_EQ(0u, stack.length());
ASSERT_TRUE(stack.pop() == NULL);
ASSERT_TRUE(stack.top() == NULL);
for (size_t id = 0; id < nelements; ++id) {
ASSERT_EQ(id, stack.length());
Element* e = &elements[id];
ASSERT_EQ(id, e->id());
stack.push(*e);
ASSERT_FALSE(stack.empty());
ASSERT_EQ(e, stack.top());
}
}
TEST_F(LockFreeStackTestBasics, push_pop) {
for (size_t i = nelements; i > 0; ) {
ASSERT_FALSE(stack.empty());
ASSERT_EQ(i, stack.length());
--i;
Element* e = stack.pop();
ASSERT_TRUE(e != NULL);
ASSERT_EQ(&elements[i], e);
ASSERT_EQ(i, e->id());
}
ASSERT_TRUE(stack.empty());
ASSERT_EQ(0u, stack.length());
ASSERT_TRUE(stack.pop() == NULL);
}
TEST_F(LockFreeStackTestBasics, prepend_one) {
Stack other_stack;
ASSERT_TRUE(other_stack.empty());
ASSERT_TRUE(other_stack.pop() == NULL);
ASSERT_EQ(0u, other_stack.length());
ASSERT_TRUE(other_stack.top() == NULL);
ASSERT_TRUE(other_stack.pop() == NULL);
other_stack.prepend(*stack.pop_all());
ASSERT_EQ(nelements, other_stack.length());
ASSERT_TRUE(stack.empty());
ASSERT_EQ(0u, stack.length());
ASSERT_TRUE(stack.pop() == NULL);
ASSERT_TRUE(stack.top() == NULL);
for (size_t i = nelements; i > 0; ) {
ASSERT_EQ(i, other_stack.length());
--i;
Element* e = other_stack.pop();
ASSERT_TRUE(e != NULL);
ASSERT_EQ(&elements[i], e);
ASSERT_EQ(i, e->id());
}
ASSERT_EQ(0u, other_stack.length());
ASSERT_TRUE(other_stack.pop() == NULL);
}
TEST_F(LockFreeStackTestBasics, prepend_two) {
Stack other_stack;
ASSERT_TRUE(other_stack.empty());
ASSERT_EQ(0u, other_stack.length());
ASSERT_TRUE(other_stack.top() == NULL);
ASSERT_TRUE(other_stack.pop() == NULL);
Element* top = stack.pop_all();
ASSERT_EQ(top, &elements[nelements - 1]);
other_stack.prepend(*top, elements[0]);
for (size_t i = nelements; i > 0; ) {
ASSERT_EQ(i, other_stack.length());
--i;
Element* e = other_stack.pop();
ASSERT_TRUE(e != NULL);
ASSERT_EQ(&elements[i], e);
ASSERT_EQ(i, e->id());
}
ASSERT_EQ(0u, other_stack.length());
ASSERT_TRUE(other_stack.pop() == NULL);
}
TEST_F(LockFreeStackTestBasics, two_stacks) {
Stack1 stack1;
ASSERT_TRUE(stack1.pop() == NULL);
for (size_t id = 0; id < nelements; ++id) {
stack1.push(elements[id]);
}
ASSERT_EQ(nelements, stack1.length());
Element* e0 = stack.top();
Element* e1 = stack1.top();
while (true) {
ASSERT_EQ(e0, e1);
if (e0 == NULL) break;
e0 = stack.next(*e0);
e1 = stack1.next(*e1);
}
for (size_t i = nelements; i > 0; ) {
ASSERT_EQ(i, stack.length());
ASSERT_EQ(i, stack1.length());
--i;
Element* e = stack.pop();
ASSERT_TRUE(e != NULL);
ASSERT_EQ(&elements[i], e);
ASSERT_EQ(i, e->id());
Element* e1 = stack1.pop();
ASSERT_TRUE(e1 != NULL);
ASSERT_EQ(&elements[i], e1);
ASSERT_EQ(i, e1->id());
ASSERT_EQ(e, e1);
}
ASSERT_EQ(0u, stack.length());
ASSERT_EQ(0u, stack1.length());
ASSERT_TRUE(stack.pop() == NULL);
ASSERT_TRUE(stack1.pop() == NULL);
}
class LockFreeStackTestThread : public JavaTestThread {
uint _id;
Stack* _from;
Stack* _to;
volatile size_t* _processed;
size_t _process_limit;
size_t _local_processed;
volatile bool _ready;
public:
LockFreeStackTestThread(Semaphore* post,
uint id,
Stack* from,
Stack* to,
volatile size_t* processed,
size_t process_limit) :
JavaTestThread(post),
_id(id),
_from(from),
_to(to),
_processed(processed),
_process_limit(process_limit),
_local_processed(0),
_ready(false)
{}
virtual void main_run() {
OrderAccess::release_store_fence(&_ready, true);
while (true) {
Element* e = _from->pop();
if (e != NULL) {
_to->push(*e);
Atomic::inc(_processed);
++_local_processed;
} else if (OrderAccess::load_acquire(_processed) == _process_limit) {
tty->print_cr("thread %u processed " SIZE_FORMAT, _id, _local_processed);
return;
}
}
}
bool ready() const { return OrderAccess::load_acquire(&_ready); }
};
TEST_VM(LockFreeStackTest, stress) {
Semaphore post;
Stack initial_stack;
Stack start_stack;
Stack middle_stack;
Stack final_stack;
volatile size_t stage1_processed = 0;
volatile size_t stage2_processed = 0;
const size_t nelements = 10000;
Element* elements = NEW_C_HEAP_ARRAY(Element, nelements, mtOther);
for (size_t id = 0; id < nelements; ++id) {
::new (&elements[id]) Element(id);
initial_stack.push(elements[id]);
}
ASSERT_EQ(nelements, initial_stack.length());
// - stage1 threads pop from start_stack and push to middle_stack.
// - stage2 threads pop from middle_stack and push to final_stack.
// - all threads in a stage count the number of elements processed in
// their corresponding stageN_processed counter.
const uint stage1_threads = 2;
const uint stage2_threads = 2;
const uint nthreads = stage1_threads + stage2_threads;
LockFreeStackTestThread* threads[nthreads] = {};
for (uint i = 0; i < ARRAY_SIZE(threads); ++i) {
Stack* from = &start_stack;
Stack* to = &middle_stack;
volatile size_t* processed = &stage1_processed;
if (i >= stage1_threads) {
from = &middle_stack;
to = &final_stack;
processed = &stage2_processed;
}
threads[i] =
new LockFreeStackTestThread(&post, i, from, to, processed, nelements);
threads[i]->doit();
while (!threads[i]->ready()) {} // Wait until ready to start test.
}
// Transfer elements to start_stack to start test.
start_stack.prepend(*initial_stack.pop_all());
// Wait for all threads to complete.
for (uint i = 0; i < nthreads; ++i) {
post.wait();
}
// Verify expected state.
ASSERT_EQ(nelements, stage1_processed);
ASSERT_EQ(nelements, stage2_processed);
ASSERT_EQ(0u, initial_stack.length());
ASSERT_EQ(0u, start_stack.length());
ASSERT_EQ(0u, middle_stack.length());
ASSERT_EQ(nelements, final_stack.length());
while (final_stack.pop() != NULL) {}
FREE_C_HEAP_ARRAY(Element, elements);
}