diff --git a/src/hotspot/share/gc/g1/g1BarrierSet.cpp b/src/hotspot/share/gc/g1/g1BarrierSet.cpp index 2788c4877e0..119171b43d4 100644 --- a/src/hotspot/share/gc/g1/g1BarrierSet.cpp +++ b/src/hotspot/share/gc/g1/g1BarrierSet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -55,8 +55,8 @@ G1BarrierSet::G1BarrierSet(G1CardTable* card_table) : make_barrier_set_c2(), card_table, BarrierSet::FakeRtti(BarrierSet::G1BarrierSet)), - _satb_mark_queue_buffer_allocator(G1SATBBufferSize, SATB_Q_FL_lock), - _dirty_card_queue_buffer_allocator(G1UpdateBufferSize, DirtyCardQ_FL_lock), + _satb_mark_queue_buffer_allocator("SATB Buffer Allocator", G1SATBBufferSize), + _dirty_card_queue_buffer_allocator("DC Buffer Allocator", G1UpdateBufferSize), _satb_mark_queue_set(), _dirty_card_queue_set() {} diff --git a/src/hotspot/share/gc/shared/ptrQueue.cpp b/src/hotspot/share/gc/shared/ptrQueue.cpp index ae3a4f0dd24..25d6cc662c5 100644 --- a/src/hotspot/share/gc/shared/ptrQueue.cpp +++ b/src/hotspot/share/gc/shared/ptrQueue.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,12 +24,15 @@ #include "precompiled.hpp" #include "gc/shared/ptrQueue.hpp" +#include "logging/log.hpp" #include "memory/allocation.hpp" #include "memory/allocation.inline.hpp" #include "runtime/atomic.hpp" #include "runtime/mutex.hpp" #include "runtime/mutexLocker.hpp" +#include "runtime/orderAccess.hpp" #include "runtime/thread.inline.hpp" +#include "utilities/globalCounter.inline.hpp" #include @@ -85,20 +88,29 @@ void BufferNode::deallocate(BufferNode* node) { FREE_C_HEAP_ARRAY(char, node); } -BufferNode::Allocator::Allocator(size_t buffer_size, Mutex* lock) : +BufferNode::Allocator::Allocator(const char* name, size_t buffer_size) : _buffer_size(buffer_size), - _lock(lock), - _free_list(NULL), - _free_count(0) + _pending_list(), + _free_list(), + _pending_count(0), + _free_count(0), + _transfer_lock(false) { - assert(lock != NULL, "precondition"); + strncpy(_name, name, sizeof(_name)); + _name[sizeof(_name) - 1] = '\0'; } BufferNode::Allocator::~Allocator() { - while (_free_list != NULL) { - BufferNode* node = _free_list; - _free_list = node->next(); - BufferNode::deallocate(node); + delete_list(_free_list.pop_all()); + delete_list(_pending_list.pop_all()); +} + +void BufferNode::Allocator::delete_list(BufferNode* list) { + while (list != NULL) { + BufferNode* next = list->next(); + DEBUG_ONLY(list->set_next(NULL);) + BufferNode::deallocate(list); + list = next; } } @@ -107,55 +119,109 @@ size_t BufferNode::Allocator::free_count() const { } BufferNode* BufferNode::Allocator::allocate() { - BufferNode* node = NULL; + BufferNode* node; { - MutexLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); - node = _free_list; - if (node != NULL) { - _free_list = node->next(); - --_free_count; - node->set_next(NULL); - node->set_index(0); - return node; - } + // Protect against ABA; see release(). + GlobalCounter::CriticalSection cs(Thread::current()); + node = _free_list.pop(); } - return BufferNode::allocate(_buffer_size); + if (node == NULL) { + node = BufferNode::allocate(_buffer_size); + } else { + // Decrement count after getting buffer from free list. This, along + // with incrementing count before adding to free list, ensures count + // never underflows. + size_t count = Atomic::sub(1u, &_free_count); + assert((count + 1) != 0, "_free_count underflow"); + } + return node; } +// To solve the ABA problem for lock-free stack pop, allocate does the +// pop inside a critical section, and release synchronizes on the +// critical sections before adding to the _free_list. But we don't +// want to make every release have to do a synchronize. Instead, we +// initially place released nodes on the _pending_list, and transfer +// them to the _free_list in batches. Only one transfer at a time is +// permitted, with a lock bit to control access to that phase. A +// transfer takes all the nodes from the _pending_list, synchronizes on +// the _free_list pops, and then adds the former pending nodes to the +// _free_list. While that's happening, other threads might be adding +// other nodes to the _pending_list, to be dealt with by some later +// transfer. void BufferNode::Allocator::release(BufferNode* node) { - MutexLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); - node->set_next(_free_list); - _free_list = node; - ++_free_count; + assert(node != NULL, "precondition"); + assert(node->next() == NULL, "precondition"); + + // Desired minimum transfer batch size. There is relatively little + // importance to the specific number. It shouldn't be too big, else + // we're wasting space when the release rate is low. If the release + // rate is high, we might accumulate more than this before being + // able to start a new transfer, but that's okay. Also note that + // the allocation rate and the release rate are going to be fairly + // similar, due to how the buffers are used. + const size_t trigger_transfer = 10; + + // Add to pending list. Update count first so no underflow in transfer. + size_t pending_count = Atomic::add(1u, &_pending_count); + _pending_list.push(*node); + if (pending_count > trigger_transfer) { + try_transfer_pending(); + } } -void BufferNode::Allocator::reduce_free_list() { - BufferNode* head = NULL; - { - MutexLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); - // For now, delete half. - size_t remove = _free_count / 2; - if (remove > 0) { - head = _free_list; - BufferNode* tail = head; - BufferNode* prev = NULL; - for (size_t i = 0; i < remove; ++i) { - assert(tail != NULL, "free list size is wrong"); - prev = tail; - tail = tail->next(); - } - assert(prev != NULL, "invariant"); - assert(prev->next() == tail, "invariant"); - prev->set_next(NULL); - _free_list = tail; - _free_count -= remove; +// Try to transfer nodes from _pending_list to _free_list, with a +// synchronization delay for any in-progress pops from the _free_list, +// to solve ABA there. Return true if performed a (possibly empty) +// transfer, false if blocked from doing so by some other thread's +// in-progress transfer. +bool BufferNode::Allocator::try_transfer_pending() { + // Attempt to claim the lock. + if (Atomic::load(&_transfer_lock) || // Skip CAS if likely to fail. + Atomic::cmpxchg(true, &_transfer_lock, false)) { + return false; + } + // Have the lock; perform the transfer. + + // Claim all the pending nodes. + BufferNode* first = _pending_list.pop_all(); + if (first != NULL) { + // Prepare to add the claimed nodes, and update _pending_count. + BufferNode* last = first; + size_t count = 1; + for (BufferNode* next = first->next(); next != NULL; next = next->next()) { + last = next; + ++count; } + Atomic::sub(count, &_pending_count); + + // Wait for any in-progress pops, to avoid ABA for them. + GlobalCounter::write_synchronize(); + + // Add synchronized nodes to _free_list. + // Update count first so no underflow in allocate(). + Atomic::add(count, &_free_count); + _free_list.prepend(*first, *last); + log_trace(gc, ptrqueue, freelist) + ("Transferred %s pending to free: " SIZE_FORMAT, name(), count); } - while (head != NULL) { - BufferNode* next = head->next(); - BufferNode::deallocate(head); - head = next; + OrderAccess::release_store(&_transfer_lock, false); + return true; +} + +size_t BufferNode::Allocator::reduce_free_list(size_t remove_goal) { + try_transfer_pending(); + size_t removed = 0; + for ( ; removed < remove_goal; ++removed) { + BufferNode* node = _free_list.pop(); + if (node == NULL) break; + BufferNode::deallocate(node); } + size_t new_count = Atomic::sub(removed, &_free_count); + log_debug(gc, ptrqueue, freelist) + ("Reduced %s free list by " SIZE_FORMAT " to " SIZE_FORMAT, + name(), removed, new_count); + return removed; } PtrQueueSet::PtrQueueSet(bool notify_when_complete) : diff --git a/src/hotspot/share/gc/shared/ptrQueue.hpp b/src/hotspot/share/gc/shared/ptrQueue.hpp index ae454a39a43..06a20981a4b 100644 --- a/src/hotspot/share/gc/shared/ptrQueue.hpp +++ b/src/hotspot/share/gc/shared/ptrQueue.hpp @@ -25,7 +25,10 @@ #ifndef SHARE_GC_SHARED_PTRQUEUE_HPP #define SHARE_GC_SHARED_PTRQUEUE_HPP +#include "memory/padded.hpp" #include "utilities/align.hpp" +#include "utilities/debug.hpp" +#include "utilities/lockFreeStack.hpp" #include "utilities/sizes.hpp" class Mutex; @@ -215,7 +218,7 @@ protected: class BufferNode { size_t _index; - BufferNode* _next; + BufferNode* volatile _next; void* _buffer[1]; // Pseudo flexible array member. BufferNode() : _index(0), _next(NULL) { } @@ -225,6 +228,8 @@ class BufferNode { return offset_of(BufferNode, _buffer); } + static BufferNode* volatile* next_ptr(BufferNode& bn) { return &bn._next; } + AIX_ONLY(public:) // xlC 12 on AIX doesn't implement C++ DR45. // Allocate a new BufferNode with the "buffer" having size elements. static BufferNode* allocate(size_t size); @@ -233,6 +238,8 @@ AIX_ONLY(public:) // xlC 12 on AIX doesn't implement C++ DR45. static void deallocate(BufferNode* node); public: + typedef LockFreeStack Stack; + BufferNode* next() const { return _next; } void set_next(BufferNode* n) { _next = n; } size_t index() const { return _index; } @@ -254,23 +261,52 @@ public: reinterpret_cast(node) + buffer_offset()); } - // Free-list based allocator. - class Allocator { - size_t _buffer_size; - Mutex* _lock; - BufferNode* _free_list; - volatile size_t _free_count; + class Allocator; // Free-list based allocator. + class TestSupport; // Unit test support. +}; - public: - Allocator(size_t buffer_size, Mutex* lock); - ~Allocator(); +// Allocation is based on a lock-free free list of nodes, linked through +// BufferNode::_next (see BufferNode::Stack). To solve the ABA problem, +// popping a node from the free list is performed within a GlobalCounter +// critical section, and pushing nodes onto the free list is done after +// a GlobalCounter synchronization associated with the nodes to be pushed. +// This is documented behavior so that other parts of the node life-cycle +// can depend on and make use of it too. +class BufferNode::Allocator { + friend class TestSupport; - size_t buffer_size() const { return _buffer_size; } - size_t free_count() const; - BufferNode* allocate(); - void release(BufferNode* node); - void reduce_free_list(); - }; + // Since we don't expect many instances, and measured >15% speedup + // on stress gtest, padding seems like a good tradeoff here. +#define DECLARE_PADDED_MEMBER(Id, Type, Name) \ + Type Name; DEFINE_PAD_MINUS_SIZE(Id, DEFAULT_CACHE_LINE_SIZE, sizeof(Type)) + + const size_t _buffer_size; + char _name[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; // Use name as padding. + DECLARE_PADDED_MEMBER(1, Stack, _pending_list); + DECLARE_PADDED_MEMBER(2, Stack, _free_list); + DECLARE_PADDED_MEMBER(3, volatile size_t, _pending_count); + DECLARE_PADDED_MEMBER(4, volatile size_t, _free_count); + DECLARE_PADDED_MEMBER(5, volatile bool, _transfer_lock); + +#undef DECLARE_PADDED_MEMBER + + void delete_list(BufferNode* list); + bool try_transfer_pending(); + +public: + Allocator(const char* name, size_t buffer_size); + ~Allocator(); + + const char* name() const { return _name; } + size_t buffer_size() const { return _buffer_size; } + size_t free_count() const; + BufferNode* allocate(); + void release(BufferNode* node); + + // Deallocate some of the available buffers. remove_goal is the target + // number to remove. Returns the number actually deallocated, which may + // be less than the goal if there were fewer available. + size_t reduce_free_list(size_t remove_goal); }; // A PtrQueueSet represents resources common to a set of pointer queues. diff --git a/src/hotspot/share/logging/logTag.hpp b/src/hotspot/share/logging/logTag.hpp index 4ce4e4a242f..81e94276efc 100644 --- a/src/hotspot/share/logging/logTag.hpp +++ b/src/hotspot/share/logging/logTag.hpp @@ -133,6 +133,7 @@ LOG_TAG(reloc) \ LOG_TAG(remset) \ LOG_TAG(parser) \ + LOG_TAG(ptrqueue) \ LOG_TAG(purge) \ LOG_TAG(resolve) \ LOG_TAG(safepoint) \ diff --git a/src/hotspot/share/memory/padded.hpp b/src/hotspot/share/memory/padded.hpp index dafa6767a64..c51d1e7a63b 100644 --- a/src/hotspot/share/memory/padded.hpp +++ b/src/hotspot/share/memory/padded.hpp @@ -25,6 +25,7 @@ #ifndef SHARE_MEMORY_PADDED_HPP #define SHARE_MEMORY_PADDED_HPP +#include "memory/allocation.hpp" #include "utilities/align.hpp" #include "utilities/globalDefinitions.hpp" diff --git a/src/hotspot/share/runtime/mutexLocker.cpp b/src/hotspot/share/runtime/mutexLocker.cpp index 558d865512d..7ec97f35629 100644 --- a/src/hotspot/share/runtime/mutexLocker.cpp +++ b/src/hotspot/share/runtime/mutexLocker.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -79,10 +79,8 @@ Mutex* NonJavaThreadsList_lock = NULL; Monitor* CGC_lock = NULL; Monitor* STS_lock = NULL; Monitor* FullGCCount_lock = NULL; -Mutex* SATB_Q_FL_lock = NULL; Monitor* SATB_Q_CBL_mon = NULL; Mutex* Shared_SATB_Q_lock = NULL; -Mutex* DirtyCardQ_FL_lock = NULL; Monitor* DirtyCardQ_CBL_mon = NULL; Mutex* Shared_DirtyCardQ_lock = NULL; Mutex* MarkStackFreeList_lock = NULL; @@ -214,11 +212,9 @@ void mutex_init() { def(FullGCCount_lock , PaddedMonitor, leaf, true, Monitor::_safepoint_check_never); // in support of ExplicitGCInvokesConcurrent } if (UseG1GC) { - def(SATB_Q_FL_lock , PaddedMutex , access, true, Monitor::_safepoint_check_never); def(SATB_Q_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never); def(Shared_SATB_Q_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never); - def(DirtyCardQ_FL_lock , PaddedMutex , access, true, Monitor::_safepoint_check_never); def(DirtyCardQ_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never); def(Shared_DirtyCardQ_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never); @@ -235,7 +231,6 @@ void mutex_init() { def(MonitoringSupport_lock , PaddedMutex , native , true, Monitor::_safepoint_check_never); // used for serviceability monitoring support } if (UseShenandoahGC) { - def(SATB_Q_FL_lock , PaddedMutex , access, true, Monitor::_safepoint_check_never); def(SATB_Q_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never); def(Shared_SATB_Q_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never); diff --git a/src/hotspot/share/runtime/mutexLocker.hpp b/src/hotspot/share/runtime/mutexLocker.hpp index 15d724e2761..64c3ae7491e 100644 --- a/src/hotspot/share/runtime/mutexLocker.hpp +++ b/src/hotspot/share/runtime/mutexLocker.hpp @@ -76,16 +76,12 @@ extern Monitor* CGC_lock; // used for coordination betwee // fore- & background GC threads. extern Monitor* STS_lock; // used for joining/leaving SuspendibleThreadSet. extern Monitor* FullGCCount_lock; // in support of "concurrent" full gc -extern Mutex* SATB_Q_FL_lock; // Protects SATB Q - // buffer free list. extern Monitor* SATB_Q_CBL_mon; // Protects SATB Q // completed buffer queue. extern Mutex* Shared_SATB_Q_lock; // Lock protecting SATB // queue shared by // non-Java threads. -extern Mutex* DirtyCardQ_FL_lock; // Protects dirty card Q - // buffer free list. extern Monitor* DirtyCardQ_CBL_mon; // Protects dirty card Q // completed buffer queue. extern Mutex* Shared_DirtyCardQ_lock; // Lock protecting dirty card diff --git a/src/hotspot/share/utilities/lockFreeStack.hpp b/src/hotspot/share/utilities/lockFreeStack.hpp new file mode 100644 index 00000000000..7cf54e0cd1b --- /dev/null +++ b/src/hotspot/share/utilities/lockFreeStack.hpp @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_UTILITIES_LOCKFREESTACK_HPP +#define SHARE_UTILITIES_LOCKFREESTACK_HPP + +#include "runtime/atomic.hpp" +#include "utilities/debug.hpp" +#include "utilities/macros.hpp" + +// The LockFreeStack class template provides a lock-free LIFO. The objects +// in the sequence are intrusively linked via a member in the objects. As +// a result, there is no allocation involved in adding objects to the stack +// or removing them from the stack. +// +// To be used in a LockFreeStack of objects of type T, an object of +// type T must have a list entry member of type T* volatile, with an +// non-member accessor function returning a pointer to that member. A +// LockFreeStack is associated with the class of its elements and an +// entry member from that class. +// +// An object can be in multiple stacks at the same time, so long as +// each stack uses a different entry member. That is, the class of the +// object must have multiple LockFreeStack entry members, one for each +// stack in which the object may simultaneously be an element. +// +// LockFreeStacks support polymorphic elements. Because the objects +// in a stack are externally managed, rather than being embedded +// values in the stack, the actual type of such objects may be more +// specific than the stack's element type. +// +// \tparam T is the class of the elements in the stack. +// +// \tparam next_ptr is a function pointer. Applying this function to +// an object of type T must return a pointer to the list entry member +// of the object associated with the LockFreeStack type. +template +class LockFreeStack { + T* volatile _top; + + void prepend_impl(T* first, T* last) { + T* cur = top(); + T* old; + do { + old = cur; + set_next(*last, cur); + cur = Atomic::cmpxchg(first, &_top, cur); + } while (old != cur); + } + + // Noncopyable. + LockFreeStack(const LockFreeStack&); + LockFreeStack& operator=(const LockFreeStack&); + +public: + LockFreeStack() : _top(NULL) {} + ~LockFreeStack() { assert(empty(), "stack not empty"); } + + // Atomically removes the top object from this stack and returns a + // pointer to that object, or NULL if this stack is empty. Acts as a + // full memory barrier. Subject to ABA behavior; callers must ensure + // usage is safe. + T* pop() { + T* result = top(); + T* old; + do { + old = result; + T* new_top = NULL; + if (result != NULL) { + new_top = next(*result); + } + // CAS even on empty pop, for consistent membar bahavior. + result = Atomic::cmpxchg(new_top, &_top, result); + } while (result != old); + if (result != NULL) { + set_next(*result, NULL); + } + return result; + } + + // Atomically exchange the list of elements with NULL, returning the old + // list of elements. Acts as a full memory barrier. + // postcondition: empty() + T* pop_all() { + return Atomic::xchg((T*)NULL, &_top); + } + + // Atomically adds value to the top of this stack. Acts as a full + // memory barrier. + void push(T& value) { + assert(next(value) == NULL, "precondition"); + prepend_impl(&value, &value); + } + + // Atomically adds the list of objects (designated by first and + // last) before the objects already in this stack, in the same order + // as in the list. Acts as a full memory barrier. + // precondition: next(last) == NULL. + // postcondition: top() == &first, next(last) == old top(). + void prepend(T& first, T& last) { + assert(next(last) == NULL, "precondition"); +#ifdef ASSERT + for (T* p = &first; p != &last; p = next(*p)) { + assert(p != NULL, "invalid prepend list"); + } +#endif + prepend_impl(&first, &last); + } + + // Atomically adds the list of objects headed by first before the + // objects already in this stack, in the same order as in the list. + // Acts as a full memory barrier. + // postcondition: top() == &first. + void prepend(T& first) { + T* last = &first; + while (true) { + T* step_to = next(*last); + if (step_to == NULL) break; + last = step_to; + } + prepend_impl(&first, last); + } + + // Return true if the stack is empty. + bool empty() const { return top() == NULL; } + + // Return the most recently pushed element, or NULL if the stack is empty. + // The returned element is not removed from the stack. + T* top() const { return Atomic::load(&_top); } + + // Return the number of objects in the stack. There must be no concurrent + // pops while the length is being determined. + size_t length() const { + size_t result = 0; + for (const T* current = top(); current != NULL; current = next(*current)) { + ++result; + } + return result; + } + + // Return the entry following value in the list used by the + // specialized LockFreeStack class. + static T* next(const T& value) { + return Atomic::load(next_ptr(const_cast(value))); + } + + // Set the entry following value to new_next in the list used by the + // specialized LockFreeStack class. Not thread-safe; in particular, + // if value is in an instance of this specialization of LockFreeStack, + // there must be no concurrent push or pop operations on that stack. + static void set_next(T& value, T* new_next) { + Atomic::store(new_next, next_ptr(value)); + } +}; + +#endif // SHARE_UTILITIES_LOCKFREESTACK_HPP diff --git a/test/hotspot/gtest/gc/shared/test_ptrQueueBufferAllocator.cpp b/test/hotspot/gtest/gc/shared/test_ptrQueueBufferAllocator.cpp index 7340f224cc0..863fbed690d 100644 --- a/test/hotspot/gtest/gc/shared/test_ptrQueueBufferAllocator.cpp +++ b/test/hotspot/gtest/gc/shared/test_ptrQueueBufferAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,14 +24,37 @@ #include "precompiled.hpp" #include "gc/shared/ptrQueue.hpp" -#include "runtime/mutex.hpp" +#include "memory/allocation.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/orderAccess.hpp" +#include "runtime/semaphore.inline.hpp" +#include "runtime/thread.hpp" +#include "utilities/globalCounter.inline.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/ostream.hpp" +#include "threadHelper.inline.hpp" #include "unittest.hpp" +class BufferNode::TestSupport : AllStatic { +public: + static bool try_transfer_pending(Allocator* allocator) { + return allocator->try_transfer_pending(); + } + + class CompletedList; + class AllocatorThread; + class ProcessorThread; +}; + +typedef BufferNode::TestSupport::CompletedList CompletedList; +typedef BufferNode::TestSupport::AllocatorThread AllocatorThread; +typedef BufferNode::TestSupport::ProcessorThread ProcessorThread; + // Some basic testing of BufferNode::Allocator. TEST_VM(PtrQueueBufferAllocatorTest, test) { - Mutex m(Mutex::leaf, "PtrQueueBufferAllocatorTest", - false, Mutex::_safepoint_check_never); - BufferNode::Allocator allocator(256, &m); + const size_t buffer_size = 256; + BufferNode::Allocator allocator("Test Buffer Allocator", buffer_size); + ASSERT_EQ(buffer_size, allocator.buffer_size()); // Allocate some new nodes for use in testing. BufferNode* nodes[10] = {}; @@ -44,8 +67,11 @@ TEST_VM(PtrQueueBufferAllocatorTest, test) { // Release the nodes, adding them to the allocator's free list. for (size_t i = 0; i < node_count; ++i) { - ASSERT_EQ(i, allocator.free_count()); allocator.release(nodes[i]); + } + ASSERT_TRUE(BufferNode::TestSupport::try_transfer_pending(&allocator)); + ASSERT_EQ(node_count, allocator.free_count()); + for (size_t i = 0; i < node_count; ++i) { if (i == 0) { ASSERT_EQ((BufferNode*)NULL, nodes[i]->next()); } else { @@ -56,7 +82,6 @@ TEST_VM(PtrQueueBufferAllocatorTest, test) { // Allocate nodes from the free list. for (size_t i = 0; i < node_count; ++i) { size_t j = node_count - i; - ASSERT_EQ(j, allocator.free_count()); ASSERT_EQ(nodes[j - 1], allocator.allocate()); } ASSERT_EQ(0u, allocator.free_count()); @@ -65,11 +90,161 @@ TEST_VM(PtrQueueBufferAllocatorTest, test) { for (size_t i = 0; i < node_count; ++i) { allocator.release(nodes[i]); } + ASSERT_TRUE(BufferNode::TestSupport::try_transfer_pending(&allocator)); ASSERT_EQ(node_count, allocator.free_count()); // Destroy some nodes in the free list. // We don't have a way to verify destruction, but we can at - // leat verify we don't crash along the way. - allocator.reduce_free_list(); + // least verify we don't crash along the way. + size_t count = allocator.free_count(); + ASSERT_EQ(count, allocator.reduce_free_list(count)); // destroy allocator. } + +// Stress test with lock-free allocator and completed buffer list. +// Completed buffer list pop avoids ABA by also being in a critical +// section that is synchronized by the allocator's release. + +class BufferNode::TestSupport::CompletedList { + BufferNode::Stack _completed_list; + +public: + CompletedList() : _completed_list() {} + + ~CompletedList() { + assert(_completed_list.empty(), "completed list not empty"); + } + + void push(BufferNode* node) { + assert(node != NULL, "precondition"); + _completed_list.push(*node); + } + + BufferNode* pop() { + GlobalCounter::CriticalSection cs(Thread::current()); + return _completed_list.pop(); + } +}; + +// Simulate a mutator thread, allocating buffers and adding them to +// the completed buffer list. +class BufferNode::TestSupport::AllocatorThread : public JavaTestThread { + BufferNode::Allocator* _allocator; + CompletedList* _cbl; + volatile size_t* _total_allocations; + volatile bool* _continue_running; + size_t _allocations; + +public: + AllocatorThread(Semaphore* post, + BufferNode::Allocator* allocator, + CompletedList* cbl, + volatile size_t* total_allocations, + volatile bool* continue_running) : + JavaTestThread(post), + _allocator(allocator), + _cbl(cbl), + _total_allocations(total_allocations), + _continue_running(continue_running), + _allocations(0) + {} + + virtual void main_run() { + while (OrderAccess::load_acquire(_continue_running)) { + BufferNode* node = _allocator->allocate(); + _cbl->push(node); + ++_allocations; + ThreadBlockInVM tbiv(this); // Safepoint check. + } + tty->print_cr("allocations: " SIZE_FORMAT, _allocations); + Atomic::add(_allocations, _total_allocations); + } +}; + +// Simulate a GC thread, taking buffers from the completed buffer list +// and returning them to the allocator. +class BufferNode::TestSupport::ProcessorThread : public JavaTestThread { + BufferNode::Allocator* _allocator; + CompletedList* _cbl; + volatile bool* _continue_running; + +public: + ProcessorThread(Semaphore* post, + BufferNode::Allocator* allocator, + CompletedList* cbl, + volatile bool* continue_running) : + JavaTestThread(post), + _allocator(allocator), + _cbl(cbl), + _continue_running(continue_running) + {} + + virtual void main_run() { + while (true) { + BufferNode* node = _cbl->pop(); + if (node != NULL) { + _allocator->release(node); + } else if (!OrderAccess::load_acquire(_continue_running)) { + return; + } + ThreadBlockInVM tbiv(this); // Safepoint check. + } + } +}; + +static void run_test(BufferNode::Allocator* allocator, CompletedList* cbl) { + const uint nthreads = 4; + const uint milliseconds_to_run = 1000; + + Semaphore post; + volatile size_t total_allocations = 0; + volatile bool allocator_running = true; + volatile bool processor_running = true; + + ProcessorThread* proc_threads[nthreads] = {}; + for (uint i = 0; i < nthreads; ++i) { + proc_threads[i] = new ProcessorThread(&post, + allocator, + cbl, + &processor_running); + proc_threads[i]->doit(); + } + + AllocatorThread* alloc_threads[nthreads] = {}; + for (uint i = 0; i < nthreads; ++i) { + alloc_threads[i] = new AllocatorThread(&post, + allocator, + cbl, + &total_allocations, + &allocator_running); + alloc_threads[i]->doit(); + } + + JavaThread* this_thread = JavaThread::current(); + tty->print_cr("Stressing allocator for %u ms", milliseconds_to_run); + { + ThreadInVMfromNative invm(this_thread); + os::sleep(this_thread, milliseconds_to_run, true); + } + OrderAccess::release_store(&allocator_running, false); + for (uint i = 0; i < nthreads; ++i) { + ThreadInVMfromNative invm(this_thread); + post.wait_with_safepoint_check(this_thread); + } + OrderAccess::release_store(&processor_running, false); + for (uint i = 0; i < nthreads; ++i) { + ThreadInVMfromNative invm(this_thread); + post.wait_with_safepoint_check(this_thread); + } + ASSERT_TRUE(BufferNode::TestSupport::try_transfer_pending(allocator)); + tty->print_cr("total allocations: " SIZE_FORMAT, total_allocations); + tty->print_cr("allocator free count: " SIZE_FORMAT, allocator->free_count()); +} + +const size_t buffer_size = 1024; + +TEST_VM(PtrQueueBufferAllocatorTest, stress_free_list_allocator) { + BufferNode::Allocator allocator("Test Allocator", buffer_size); + CompletedList completed; + run_test(&allocator, &completed); +} diff --git a/test/hotspot/gtest/utilities/test_lockFreeStack.cpp b/test/hotspot/gtest/utilities/test_lockFreeStack.cpp new file mode 100644 index 00000000000..76772b6a407 --- /dev/null +++ b/test/hotspot/gtest/utilities/test_lockFreeStack.cpp @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "memory/allocation.inline.hpp" +#include "runtime/atomic.hpp" +#include "runtime/orderAccess.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/lockFreeStack.hpp" +#include "threadHelper.inline.hpp" +#include "unittest.hpp" +#include + +class LockFreeStackTestElement { + typedef LockFreeStackTestElement Element; + + Element* volatile _entry; + Element* volatile _entry1; + size_t _id; + + static Element* volatile* entry_ptr(Element& e) { return &e._entry; } + static Element* volatile* entry1_ptr(Element& e) { return &e._entry1; } + +public: + LockFreeStackTestElement(size_t id = 0) : _entry(), _entry1(), _id(id) {} + size_t id() const { return _id; } + void set_id(size_t value) { _id = value; } + + typedef LockFreeStack Stack; + typedef LockFreeStack Stack1; +}; + +typedef LockFreeStackTestElement Element; +typedef Element::Stack Stack; +typedef Element::Stack1 Stack1; + +static void initialize_ids(Element* elements, size_t size) { + for (size_t i = 0; i < size; ++i) { + elements[i].set_id(i); + } +} + +class LockFreeStackTestBasics : public ::testing::Test { +public: + LockFreeStackTestBasics(); + + static const size_t nelements = 10; + Element elements[nelements]; + Stack stack; + +private: + void initialize(); +}; + +const size_t LockFreeStackTestBasics::nelements; + +LockFreeStackTestBasics::LockFreeStackTestBasics() : stack() { + initialize_ids(elements, nelements); + initialize(); +} + +void LockFreeStackTestBasics::initialize() { + ASSERT_TRUE(stack.empty()); + ASSERT_EQ(0u, stack.length()); + ASSERT_TRUE(stack.pop() == NULL); + ASSERT_TRUE(stack.top() == NULL); + + for (size_t id = 0; id < nelements; ++id) { + ASSERT_EQ(id, stack.length()); + Element* e = &elements[id]; + ASSERT_EQ(id, e->id()); + stack.push(*e); + ASSERT_FALSE(stack.empty()); + ASSERT_EQ(e, stack.top()); + } +} + +TEST_F(LockFreeStackTestBasics, push_pop) { + for (size_t i = nelements; i > 0; ) { + ASSERT_FALSE(stack.empty()); + ASSERT_EQ(i, stack.length()); + --i; + Element* e = stack.pop(); + ASSERT_TRUE(e != NULL); + ASSERT_EQ(&elements[i], e); + ASSERT_EQ(i, e->id()); + } + ASSERT_TRUE(stack.empty()); + ASSERT_EQ(0u, stack.length()); + ASSERT_TRUE(stack.pop() == NULL); +} + +TEST_F(LockFreeStackTestBasics, prepend_one) { + Stack other_stack; + ASSERT_TRUE(other_stack.empty()); + ASSERT_TRUE(other_stack.pop() == NULL); + ASSERT_EQ(0u, other_stack.length()); + ASSERT_TRUE(other_stack.top() == NULL); + ASSERT_TRUE(other_stack.pop() == NULL); + + other_stack.prepend(*stack.pop_all()); + ASSERT_EQ(nelements, other_stack.length()); + ASSERT_TRUE(stack.empty()); + ASSERT_EQ(0u, stack.length()); + ASSERT_TRUE(stack.pop() == NULL); + ASSERT_TRUE(stack.top() == NULL); + + for (size_t i = nelements; i > 0; ) { + ASSERT_EQ(i, other_stack.length()); + --i; + Element* e = other_stack.pop(); + ASSERT_TRUE(e != NULL); + ASSERT_EQ(&elements[i], e); + ASSERT_EQ(i, e->id()); + } + ASSERT_EQ(0u, other_stack.length()); + ASSERT_TRUE(other_stack.pop() == NULL); +} + +TEST_F(LockFreeStackTestBasics, prepend_two) { + Stack other_stack; + ASSERT_TRUE(other_stack.empty()); + ASSERT_EQ(0u, other_stack.length()); + ASSERT_TRUE(other_stack.top() == NULL); + ASSERT_TRUE(other_stack.pop() == NULL); + + Element* top = stack.pop_all(); + ASSERT_EQ(top, &elements[nelements - 1]); + other_stack.prepend(*top, elements[0]); + + for (size_t i = nelements; i > 0; ) { + ASSERT_EQ(i, other_stack.length()); + --i; + Element* e = other_stack.pop(); + ASSERT_TRUE(e != NULL); + ASSERT_EQ(&elements[i], e); + ASSERT_EQ(i, e->id()); + } + ASSERT_EQ(0u, other_stack.length()); + ASSERT_TRUE(other_stack.pop() == NULL); +} + +TEST_F(LockFreeStackTestBasics, two_stacks) { + Stack1 stack1; + ASSERT_TRUE(stack1.pop() == NULL); + + for (size_t id = 0; id < nelements; ++id) { + stack1.push(elements[id]); + } + ASSERT_EQ(nelements, stack1.length()); + Element* e0 = stack.top(); + Element* e1 = stack1.top(); + while (true) { + ASSERT_EQ(e0, e1); + if (e0 == NULL) break; + e0 = stack.next(*e0); + e1 = stack1.next(*e1); + } + + for (size_t i = nelements; i > 0; ) { + ASSERT_EQ(i, stack.length()); + ASSERT_EQ(i, stack1.length()); + --i; + Element* e = stack.pop(); + ASSERT_TRUE(e != NULL); + ASSERT_EQ(&elements[i], e); + ASSERT_EQ(i, e->id()); + + Element* e1 = stack1.pop(); + ASSERT_TRUE(e1 != NULL); + ASSERT_EQ(&elements[i], e1); + ASSERT_EQ(i, e1->id()); + + ASSERT_EQ(e, e1); + } + ASSERT_EQ(0u, stack.length()); + ASSERT_EQ(0u, stack1.length()); + ASSERT_TRUE(stack.pop() == NULL); + ASSERT_TRUE(stack1.pop() == NULL); +} + +class LockFreeStackTestThread : public JavaTestThread { + uint _id; + Stack* _from; + Stack* _to; + volatile size_t* _processed; + size_t _process_limit; + size_t _local_processed; + volatile bool _ready; + +public: + LockFreeStackTestThread(Semaphore* post, + uint id, + Stack* from, + Stack* to, + volatile size_t* processed, + size_t process_limit) : + JavaTestThread(post), + _id(id), + _from(from), + _to(to), + _processed(processed), + _process_limit(process_limit), + _local_processed(0), + _ready(false) + {} + + virtual void main_run() { + OrderAccess::release_store_fence(&_ready, true); + while (true) { + Element* e = _from->pop(); + if (e != NULL) { + _to->push(*e); + Atomic::inc(_processed); + ++_local_processed; + } else if (OrderAccess::load_acquire(_processed) == _process_limit) { + tty->print_cr("thread %u processed " SIZE_FORMAT, _id, _local_processed); + return; + } + } + } + + bool ready() const { return OrderAccess::load_acquire(&_ready); } +}; + +TEST_VM(LockFreeStackTest, stress) { + Semaphore post; + Stack initial_stack; + Stack start_stack; + Stack middle_stack; + Stack final_stack; + volatile size_t stage1_processed = 0; + volatile size_t stage2_processed = 0; + + const size_t nelements = 10000; + Element* elements = NEW_C_HEAP_ARRAY(Element, nelements, mtOther); + for (size_t id = 0; id < nelements; ++id) { + ::new (&elements[id]) Element(id); + initial_stack.push(elements[id]); + } + ASSERT_EQ(nelements, initial_stack.length()); + + // - stage1 threads pop from start_stack and push to middle_stack. + // - stage2 threads pop from middle_stack and push to final_stack. + // - all threads in a stage count the number of elements processed in + // their corresponding stageN_processed counter. + + const uint stage1_threads = 2; + const uint stage2_threads = 2; + const uint nthreads = stage1_threads + stage2_threads; + LockFreeStackTestThread* threads[nthreads] = {}; + + for (uint i = 0; i < ARRAY_SIZE(threads); ++i) { + Stack* from = &start_stack; + Stack* to = &middle_stack; + volatile size_t* processed = &stage1_processed; + if (i >= stage1_threads) { + from = &middle_stack; + to = &final_stack; + processed = &stage2_processed; + } + threads[i] = + new LockFreeStackTestThread(&post, i, from, to, processed, nelements); + threads[i]->doit(); + while (!threads[i]->ready()) {} // Wait until ready to start test. + } + + // Transfer elements to start_stack to start test. + start_stack.prepend(*initial_stack.pop_all()); + + // Wait for all threads to complete. + for (uint i = 0; i < nthreads; ++i) { + post.wait(); + } + + // Verify expected state. + ASSERT_EQ(nelements, stage1_processed); + ASSERT_EQ(nelements, stage2_processed); + ASSERT_EQ(0u, initial_stack.length()); + ASSERT_EQ(0u, start_stack.length()); + ASSERT_EQ(0u, middle_stack.length()); + ASSERT_EQ(nelements, final_stack.length()); + while (final_stack.pop() != NULL) {} + + FREE_C_HEAP_ARRAY(Element, elements); +}