8212827: GlobalCounter should support nested critical sections

Support nested critical sections.

Reviewed-by: eosterlund, rehn, tschatzl
This commit is contained in:
Kim Barrett 2018-10-30 18:06:35 -04:00
parent dcb6e1dda8
commit 3185dcf0f2
8 changed files with 269 additions and 38 deletions

View File

@ -25,6 +25,10 @@
#ifndef SHARE_UTILITIES_CONCURRENT_HASH_TABLE_HPP #ifndef SHARE_UTILITIES_CONCURRENT_HASH_TABLE_HPP
#define SHARE_UTILITIES_CONCURRENT_HASH_TABLE_HPP #define SHARE_UTILITIES_CONCURRENT_HASH_TABLE_HPP
#include "memory/allocation.hpp"
#include "utilities/globalCounter.hpp"
#include "utilities/globalDefinitions.hpp"
// A mostly concurrent-hash-table where the read-side is wait-free, inserts are // A mostly concurrent-hash-table where the read-side is wait-free, inserts are
// CAS and deletes mutual exclude each other on per bucket-basis. VALUE is the // CAS and deletes mutual exclude each other on per bucket-basis. VALUE is the
// type kept inside each Node and CONFIG contains hash and allocation methods. // type kept inside each Node and CONFIG contains hash and allocation methods.
@ -247,6 +251,7 @@ class ConcurrentHashTable : public CHeapObj<F> {
protected: protected:
Thread* _thread; Thread* _thread;
ConcurrentHashTable<VALUE, CONFIG, F>* _cht; ConcurrentHashTable<VALUE, CONFIG, F>* _cht;
GlobalCounter::CSContext _cs_context;
public: public:
ScopedCS(Thread* thread, ConcurrentHashTable<VALUE, CONFIG, F>* cht); ScopedCS(Thread* thread, ConcurrentHashTable<VALUE, CONFIG, F>* cht);
~ScopedCS(); ~ScopedCS();

View File

@ -208,9 +208,10 @@ inline ConcurrentHashTable<VALUE, CONFIG, F>::
template <typename VALUE, typename CONFIG, MEMFLAGS F> template <typename VALUE, typename CONFIG, MEMFLAGS F>
inline ConcurrentHashTable<VALUE, CONFIG, F>:: inline ConcurrentHashTable<VALUE, CONFIG, F>::
ScopedCS::ScopedCS(Thread* thread, ConcurrentHashTable<VALUE, CONFIG, F>* cht) ScopedCS::ScopedCS(Thread* thread, ConcurrentHashTable<VALUE, CONFIG, F>* cht)
: _thread(thread), _cht(cht) : _thread(thread),
_cht(cht),
_cs_context(GlobalCounter::critical_section_begin(_thread))
{ {
GlobalCounter::critical_section_begin(_thread);
// This version is published now. // This version is published now.
if (OrderAccess::load_acquire(&_cht->_invisible_epoch) != NULL) { if (OrderAccess::load_acquire(&_cht->_invisible_epoch) != NULL) {
OrderAccess::release_store_fence(&_cht->_invisible_epoch, (Thread*)NULL); OrderAccess::release_store_fence(&_cht->_invisible_epoch, (Thread*)NULL);
@ -221,7 +222,7 @@ template <typename VALUE, typename CONFIG, MEMFLAGS F>
inline ConcurrentHashTable<VALUE, CONFIG, F>:: inline ConcurrentHashTable<VALUE, CONFIG, F>::
ScopedCS::~ScopedCS() ScopedCS::~ScopedCS()
{ {
GlobalCounter::critical_section_end(_thread); GlobalCounter::critical_section_end(_thread, _cs_context);
} }
// BaseConfig // BaseConfig
@ -502,7 +503,7 @@ inline void ConcurrentHashTable<VALUE, CONFIG, F>::
// concurrent single deletes. The _invisible_epoch can only be used by the // concurrent single deletes. The _invisible_epoch can only be used by the
// owner of _resize_lock, us here. There we should not changed it in our // owner of _resize_lock, us here. There we should not changed it in our
// own read-side. // own read-side.
GlobalCounter::critical_section_begin(thread); GlobalCounter::CSContext cs_context = GlobalCounter::critical_section_begin(thread);
for (size_t bucket_it = start_idx; bucket_it < stop_idx; bucket_it++) { for (size_t bucket_it = start_idx; bucket_it < stop_idx; bucket_it++) {
Bucket* bucket = table->get_bucket(bucket_it); Bucket* bucket = table->get_bucket(bucket_it);
Bucket* prefetch_bucket = (bucket_it+1) < stop_idx ? Bucket* prefetch_bucket = (bucket_it+1) < stop_idx ?
@ -514,7 +515,7 @@ inline void ConcurrentHashTable<VALUE, CONFIG, F>::
continue; continue;
} }
GlobalCounter::critical_section_end(thread); GlobalCounter::critical_section_end(thread, cs_context);
// We left critical section but the bucket cannot be removed while we hold // We left critical section but the bucket cannot be removed while we hold
// the _resize_lock. // the _resize_lock.
bucket->lock(); bucket->lock();
@ -530,9 +531,9 @@ inline void ConcurrentHashTable<VALUE, CONFIG, F>::
Node::destroy_node(ndel[node_it]); Node::destroy_node(ndel[node_it]);
DEBUG_ONLY(ndel[node_it] = (Node*)POISON_PTR;) DEBUG_ONLY(ndel[node_it] = (Node*)POISON_PTR;)
} }
GlobalCounter::critical_section_begin(thread); cs_context = GlobalCounter::critical_section_begin(thread);
} }
GlobalCounter::critical_section_end(thread); GlobalCounter::critical_section_end(thread, cs_context);
} }
template <typename VALUE, typename CONFIG, MEMFLAGS F> template <typename VALUE, typename CONFIG, MEMFLAGS F>

View File

@ -59,8 +59,8 @@ class GlobalCounter::CounterThreadCheck : public ThreadClosure {
void GlobalCounter::write_synchronize() { void GlobalCounter::write_synchronize() {
assert((*Thread::current()->get_rcu_counter() & COUNTER_ACTIVE) == 0x0, "must be outside a critcal section"); assert((*Thread::current()->get_rcu_counter() & COUNTER_ACTIVE) == 0x0, "must be outside a critcal section");
// Atomic::add must provide fence since we have storeload dependency. // Atomic::add must provide fence since we have storeload dependency.
volatile uintx gbl_cnt = Atomic::add((uintx)COUNTER_INCREMENT, &_global_counter._counter, uintx gbl_cnt = Atomic::add(COUNTER_INCREMENT, &_global_counter._counter);
memory_order_conservative);
// Handle bootstrap // Handle bootstrap
if (Threads::number_of_threads() == 0) { if (Threads::number_of_threads() == 0) {
return; return;

View File

@ -33,8 +33,9 @@ class Thread;
// The GlobalCounter provides a synchronization mechanism between threads for // The GlobalCounter provides a synchronization mechanism between threads for
// safe memory reclamation and other ABA problems. All readers must call // safe memory reclamation and other ABA problems. All readers must call
// critical_section_begin before reading the volatile data and // critical_section_begin before reading the volatile data and
// critical_section_end afterwards. The write side must call write_synchronize // critical_section_end afterwards. Such read-side critical sections may
// before reclaming the memory. The read-path only does an uncontented store // be properly nested. The write side must call write_synchronize
// before reclaming the memory. The read-path only does an uncontended store
// to a thread-local-storage and fence to stop any loads from floating up, thus // to a thread-local-storage and fence to stop any loads from floating up, thus
// light weight and wait-free. The write-side is more heavy since it must check // light weight and wait-free. The write-side is more heavy since it must check
// all readers and wait until they have left the generation. (a system memory // all readers and wait until they have left the generation. (a system memory
@ -62,20 +63,26 @@ class GlobalCounter : public AllStatic {
class CounterThreadCheck; class CounterThreadCheck;
public: public:
// Must be called before accessing the data. Only threads accessible lock-free // The type of the critical section context passed from
// can used this. Those included now are all Threads on SMR ThreadsList and // critical_section_begin() to critical_section_end().
// the VMThread. Nesting is not yet supported. typedef uintx CSContext;
static void critical_section_begin(Thread *thread);
// Must be called after finished accessing the data. // Must be called before accessing the data. The result must be passed
// Do not provide fence, allows load/stores moving into the critical section. // to the associated call to critical_section_end(). Acts as a full
static void critical_section_end(Thread *thread); // memory barrier before the code within the critical section.
static CSContext critical_section_begin(Thread *thread);
// Must be called after finished accessing the data. The context
// must be the result of the associated initiating critical_section_begin().
// Acts as a release memory barrier after the code within the critical
// section.
static void critical_section_end(Thread *thread, CSContext context);
// Make the data inaccessible to readers before calling. When this call // Make the data inaccessible to readers before calling. When this call
// returns it's safe to reclaim the data. // returns it's safe to reclaim the data. Acts as a full memory barrier.
static void write_synchronize(); static void write_synchronize();
// A scoped object for a reads-side critical-section. // A scoped object for a read-side critical-section.
class CriticalSection; class CriticalSection;
}; };

View File

@ -25,34 +25,46 @@
#ifndef SHARE_UTILITIES_GLOBAL_COUNTER_INLINE_HPP #ifndef SHARE_UTILITIES_GLOBAL_COUNTER_INLINE_HPP
#define SHARE_UTILITIES_GLOBAL_COUNTER_INLINE_HPP #define SHARE_UTILITIES_GLOBAL_COUNTER_INLINE_HPP
#include "runtime/atomic.hpp"
#include "runtime/orderAccess.hpp" #include "runtime/orderAccess.hpp"
#include "runtime/thread.inline.hpp" #include "runtime/thread.inline.hpp"
#include "utilities/globalCounter.hpp" #include "utilities/globalCounter.hpp"
inline void GlobalCounter::critical_section_begin(Thread *thread) { inline GlobalCounter::CSContext
GlobalCounter::critical_section_begin(Thread *thread) {
assert(thread == Thread::current(), "must be current thread"); assert(thread == Thread::current(), "must be current thread");
assert((*thread->get_rcu_counter() & COUNTER_ACTIVE) == 0x0, "nested critical sections, not supported yet"); uintx old_cnt = Atomic::load(thread->get_rcu_counter());
uintx gbl_cnt = OrderAccess::load_acquire(&_global_counter._counter); // Retain the old counter value if already active, e.g. nested.
OrderAccess::release_store_fence(thread->get_rcu_counter(), gbl_cnt | COUNTER_ACTIVE); // Otherwise, set the counter to the current version + active bit.
uintx new_cnt = old_cnt;
if ((new_cnt & COUNTER_ACTIVE) == 0) {
new_cnt = Atomic::load(&_global_counter._counter) | COUNTER_ACTIVE;
}
OrderAccess::release_store_fence(thread->get_rcu_counter(), new_cnt);
return static_cast<CSContext>(old_cnt);
} }
inline void GlobalCounter::critical_section_end(Thread *thread) { inline void
GlobalCounter::critical_section_end(Thread *thread, CSContext context) {
assert(thread == Thread::current(), "must be current thread"); assert(thread == Thread::current(), "must be current thread");
assert((*thread->get_rcu_counter() & COUNTER_ACTIVE) == COUNTER_ACTIVE, "must be in critical section"); assert((*thread->get_rcu_counter() & COUNTER_ACTIVE) == COUNTER_ACTIVE, "must be in critical section");
// Mainly for debugging we set it to 'now'. // Restore the counter value from before the associated begin.
uintx gbl_cnt = OrderAccess::load_acquire(&_global_counter._counter); OrderAccess::release_store(thread->get_rcu_counter(),
OrderAccess::release_store(thread->get_rcu_counter(), gbl_cnt); static_cast<uintx>(context));
} }
class GlobalCounter::CriticalSection { class GlobalCounter::CriticalSection {
private: private:
Thread* _thread; Thread* _thread;
CSContext _context;
public: public:
inline CriticalSection(Thread* thread) : _thread(thread) { inline CriticalSection(Thread* thread) :
GlobalCounter::critical_section_begin(_thread); _thread(thread),
} _context(GlobalCounter::critical_section_begin(_thread))
{}
inline ~CriticalSection() { inline ~CriticalSection() {
GlobalCounter::critical_section_end(_thread); GlobalCounter::critical_section_end(_thread, _context);
} }
}; };

View File

@ -36,7 +36,7 @@
// Any number of threads may enter critical sections associated with a // Any number of threads may enter critical sections associated with a
// synchronizer object. One (at a time) other thread may wait for the // synchronizer object. One (at a time) other thread may wait for the
// completion of all critical sections for the synchronizer object // completion of all critical sections for the synchronizer object
// that were extent when the wait was initiated. Usage is that there // that were extant when the wait was initiated. Usage is that there
// is some state that can be accessed either before or after some // is some state that can be accessed either before or after some
// change. An accessing thread performs the access within a critical // change. An accessing thread performs the access within a critical
// section. A writer thread performs the state change, and then waits // section. A writer thread performs the state change, and then waits
@ -46,9 +46,7 @@
// Generally, GlobalCounter should be used instead of this class, as // Generally, GlobalCounter should be used instead of this class, as
// GlobalCounter has measurably better performance and doesn't have // GlobalCounter has measurably better performance and doesn't have
// the single writer at a time restriction. Use this only in // the single writer at a time restriction. Use this only in
// situations where GlobalCounter won't work for some reason, such as // situations where GlobalCounter won't work for some reason.
// nesting. But note that nesting often indicates other problems, and
// may risk deadlock.
class SingleWriterSynchronizer { class SingleWriterSynchronizer {
volatile uint _enter; volatile uint _enter;
volatile uint _exit[2]; volatile uint _exit[2];

View File

@ -47,11 +47,11 @@ public:
void main_run() { void main_run() {
_wrt_start->signal(); _wrt_start->signal();
while (!_exit) { while (!_exit) {
GlobalCounter::critical_section_begin(this); GlobalCounter::CSContext cs_context = GlobalCounter::critical_section_begin(this);
volatile TestData* test = OrderAccess::load_acquire(_test); volatile TestData* test = OrderAccess::load_acquire(_test);
long value = OrderAccess::load_acquire(&test->test_value); long value = OrderAccess::load_acquire(&test->test_value);
ASSERT_EQ(value, GOOD_VALUE); ASSERT_EQ(value, GOOD_VALUE);
GlobalCounter::critical_section_end(this); GlobalCounter::critical_section_end(this, cs_context);
{ {
GlobalCounter::CriticalSection cs(this); GlobalCounter::CriticalSection cs(this);
volatile TestData* test = OrderAccess::load_acquire(_test); volatile TestData* test = OrderAccess::load_acquire(_test);

View File

@ -0,0 +1,208 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "precompiled.hpp"
#include "metaprogramming/isRegisteredEnum.hpp"
#include "runtime/atomic.hpp"
#include "runtime/orderAccess.hpp"
#include "runtime/os.hpp"
#include "utilities/globalCounter.hpp"
#include "utilities/globalCounter.inline.hpp"
#include "utilities/spinYield.hpp"
#include "threadHelper.inline.hpp"
enum NestedTestState {
START,
START_WAIT,
OUTER_ENTERED,
INNER_ENTERED,
INNER_EXITED,
OUTER_EXITED,
SYNCHRONIZING,
SYNCHRONIZED
};
template<> struct IsRegisteredEnum<NestedTestState> : public TrueType {};
class RCUNestedThread : public JavaTestThread {
volatile NestedTestState _state;
volatile bool _proceed;
protected:
RCUNestedThread(Semaphore* post) :
JavaTestThread(post),
_state(START),
_proceed(false)
{}
~RCUNestedThread() {}
void set_state(NestedTestState new_state) {
OrderAccess::release_store(&_state, new_state);
}
void wait_with_state(NestedTestState new_state) {
SpinYield spinner;
OrderAccess::release_store(&_state, new_state);
while (!OrderAccess::load_acquire(&_proceed)) {
spinner.wait();
}
OrderAccess::release_store(&_proceed, false);
}
public:
NestedTestState state() const {
return OrderAccess::load_acquire(&_state);
}
void wait_for_state(NestedTestState goal) {
SpinYield spinner;
while (state() != goal) {
spinner.wait();
}
}
void proceed() {
OrderAccess::release_store(&_proceed, true);
}
};
class RCUNestedReaderThread : public RCUNestedThread {
public:
RCUNestedReaderThread(Semaphore* post) :
RCUNestedThread(post)
{}
virtual void main_run();
};
void RCUNestedReaderThread::main_run() {
wait_with_state(START_WAIT);
{
GlobalCounter::CriticalSection outer(Thread::current());
wait_with_state(OUTER_ENTERED);
{
GlobalCounter::CriticalSection inner(Thread::current());
wait_with_state(INNER_ENTERED);
}
wait_with_state(INNER_EXITED);
}
wait_with_state(OUTER_EXITED);
}
class RCUNestedWriterThread : public RCUNestedThread {
public:
RCUNestedWriterThread(Semaphore* post) :
RCUNestedThread(post)
{}
virtual void main_run();
};
void RCUNestedWriterThread::main_run() {
wait_with_state(START_WAIT);
set_state(SYNCHRONIZING);
GlobalCounter::write_synchronize();
wait_with_state(SYNCHRONIZED);
}
TEST_VM(GlobalCounter, nested_critical_section) {
Semaphore post;
RCUNestedReaderThread* reader = new RCUNestedReaderThread(&post);
RCUNestedWriterThread* outer = new RCUNestedWriterThread(&post);
RCUNestedWriterThread* inner = new RCUNestedWriterThread(&post);
reader->doit();
outer->doit();
inner->doit();
reader->wait_for_state(START_WAIT);
outer->wait_for_state(START_WAIT);
inner->wait_for_state(START_WAIT);
EXPECT_EQ(START_WAIT, reader->state());
EXPECT_EQ(START_WAIT, outer->state());
EXPECT_EQ(START_WAIT, inner->state());
reader->proceed();
reader->wait_for_state(OUTER_ENTERED);
EXPECT_EQ(OUTER_ENTERED, reader->state());
EXPECT_EQ(START_WAIT, outer->state());
EXPECT_EQ(START_WAIT, inner->state());
outer->proceed();
outer->wait_for_state(SYNCHRONIZING);
EXPECT_EQ(OUTER_ENTERED, reader->state());
EXPECT_EQ(SYNCHRONIZING, outer->state());
EXPECT_EQ(START_WAIT, inner->state());
os::naked_short_sleep(100); // Give outer time in synchronization.
EXPECT_EQ(OUTER_ENTERED, reader->state());
EXPECT_EQ(SYNCHRONIZING, outer->state());
EXPECT_EQ(START_WAIT, inner->state());
reader->proceed();
reader->wait_for_state(INNER_ENTERED);
EXPECT_EQ(INNER_ENTERED, reader->state());
EXPECT_EQ(SYNCHRONIZING, outer->state());
EXPECT_EQ(START_WAIT, inner->state());
inner->proceed();
inner->wait_for_state(SYNCHRONIZING);
EXPECT_EQ(INNER_ENTERED, reader->state());
EXPECT_EQ(SYNCHRONIZING, outer->state());
EXPECT_EQ(SYNCHRONIZING, inner->state());
os::naked_short_sleep(100); // Give writers time in synchronization.
EXPECT_EQ(INNER_ENTERED, reader->state());
EXPECT_EQ(SYNCHRONIZING, outer->state());
EXPECT_EQ(SYNCHRONIZING, inner->state());
reader->proceed();
reader->wait_for_state(INNER_EXITED);
// inner does *not* complete synchronization here.
EXPECT_EQ(INNER_EXITED, reader->state());
EXPECT_EQ(SYNCHRONIZING, outer->state());
EXPECT_EQ(SYNCHRONIZING, inner->state());
os::naked_short_sleep(100); // Give writers more time in synchronization.
EXPECT_EQ(INNER_EXITED, reader->state());
EXPECT_EQ(SYNCHRONIZING, outer->state());
EXPECT_EQ(SYNCHRONIZING, inner->state());
reader->proceed();
reader->wait_for_state(OUTER_EXITED);
// Both inner and outer can synchronize now.
outer->wait_for_state(SYNCHRONIZED);
inner->wait_for_state(SYNCHRONIZED);
EXPECT_EQ(OUTER_EXITED, reader->state());
EXPECT_EQ(SYNCHRONIZED, outer->state());
EXPECT_EQ(SYNCHRONIZED, inner->state());
// Wait for reader, outer, and inner to complete.
reader->proceed();
outer->proceed();
inner->proceed();
for (uint i = 0; i < 3; ++i) {
post.wait();
}
}