8243326: Cleanup use of volatile in taskqueue code
Removed volatile on queue elements, cleaned up other uses, made atomics explicit. Reviewed-by: tschatzl, iwalulya
This commit is contained in:
parent
313758a57a
commit
478773c102
src/hotspot/share/gc
@ -65,22 +65,13 @@ private:
|
||||
}
|
||||
G1TaskQueueEntry(HeapWord* addr) : _holder((void*)((uintptr_t)addr | ArraySliceBit)) { }
|
||||
public:
|
||||
G1TaskQueueEntry(const G1TaskQueueEntry& other) { _holder = other._holder; }
|
||||
|
||||
G1TaskQueueEntry() : _holder(NULL) { }
|
||||
// Trivially copyable, for use in GenericTaskQueue.
|
||||
|
||||
static G1TaskQueueEntry from_slice(HeapWord* what) { return G1TaskQueueEntry(what); }
|
||||
static G1TaskQueueEntry from_oop(oop obj) { return G1TaskQueueEntry(obj); }
|
||||
|
||||
G1TaskQueueEntry& operator=(const G1TaskQueueEntry& t) {
|
||||
_holder = t._holder;
|
||||
return *this;
|
||||
}
|
||||
|
||||
volatile G1TaskQueueEntry& operator=(const volatile G1TaskQueueEntry& t) volatile {
|
||||
_holder = t._holder;
|
||||
return *this;
|
||||
}
|
||||
|
||||
oop obj() const {
|
||||
assert(!is_array_slice(), "Trying to read array slice " PTR_FORMAT " as oop", p2i(_holder));
|
||||
return (oop)_holder;
|
||||
|
@ -28,6 +28,8 @@
|
||||
#include "memory/allocation.hpp"
|
||||
#include "memory/padded.hpp"
|
||||
#include "oops/oopsHierarchy.hpp"
|
||||
#include "runtime/atomic.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/ostream.hpp"
|
||||
#include "utilities/stack.hpp"
|
||||
@ -107,31 +109,23 @@ class TaskQueueSuper: public CHeapObj<F> {
|
||||
protected:
|
||||
// Internal type for indexing the queue; also used for the tag.
|
||||
typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t;
|
||||
STATIC_ASSERT(N == idx_t(N)); // Ensure N fits in an idx_t.
|
||||
|
||||
// The first free element after the last one pushed (mod N).
|
||||
volatile uint _bottom;
|
||||
|
||||
enum { MOD_N_MASK = N - 1 };
|
||||
// N must be a power of 2 for computing modulo via masking.
|
||||
// N must be >= 2 for the algorithm to work at all, though larger is better.
|
||||
// C++11: is_power_of_2 is not (yet) constexpr.
|
||||
STATIC_ASSERT((N >= 2) && ((N & (N - 1)) == 0));
|
||||
static const uint MOD_N_MASK = N - 1;
|
||||
|
||||
class Age {
|
||||
friend class TaskQueueSuper;
|
||||
|
||||
public:
|
||||
Age(size_t data = 0) { _data = data; }
|
||||
Age(const Age& age) { _data = age._data; }
|
||||
Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; }
|
||||
explicit Age(size_t data = 0) : _data(data) {}
|
||||
Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; }
|
||||
|
||||
Age get() const volatile { return _data; }
|
||||
void set(Age age) volatile { _data = age._data; }
|
||||
|
||||
idx_t top() const volatile { return _fields._top; }
|
||||
idx_t tag() const volatile { return _fields._tag; }
|
||||
|
||||
// Increment top; if it wraps, increment tag also.
|
||||
void increment() {
|
||||
_fields._top = increment_index(_fields._top);
|
||||
if (_fields._top == 0) ++_fields._tag;
|
||||
}
|
||||
|
||||
Age cmpxchg(const Age new_age, const Age old_age) volatile;
|
||||
idx_t top() const { return _fields._top; }
|
||||
idx_t tag() const { return _fields._tag; }
|
||||
|
||||
bool operator ==(const Age& other) const { return _data == other._data; }
|
||||
|
||||
@ -144,9 +138,41 @@ protected:
|
||||
size_t _data;
|
||||
fields _fields;
|
||||
};
|
||||
STATIC_ASSERT(sizeof(size_t) >= sizeof(fields));
|
||||
};
|
||||
|
||||
volatile Age _age;
|
||||
uint bottom_relaxed() const {
|
||||
return Atomic::load(&_bottom);
|
||||
}
|
||||
|
||||
uint bottom_acquire() const {
|
||||
return Atomic::load_acquire(&_bottom);
|
||||
}
|
||||
|
||||
void set_bottom_relaxed(uint new_bottom) {
|
||||
Atomic::store(&_bottom, new_bottom);
|
||||
}
|
||||
|
||||
void release_set_bottom(uint new_bottom) {
|
||||
Atomic::release_store(&_bottom, new_bottom);
|
||||
}
|
||||
|
||||
Age age_relaxed() const {
|
||||
return Age(Atomic::load(&_age._data));
|
||||
}
|
||||
|
||||
void set_age_relaxed(Age new_age) {
|
||||
Atomic::store(&_age._data, new_age._data);
|
||||
}
|
||||
|
||||
Age cmpxchg_age(Age old_age, Age new_age) {
|
||||
return Age(Atomic::cmpxchg(&_age._data, old_age._data, new_age._data));
|
||||
}
|
||||
|
||||
idx_t age_top_relaxed() const {
|
||||
// Atomically accessing a subfield of an "atomic" member.
|
||||
return Atomic::load(&_age._fields._top);
|
||||
}
|
||||
|
||||
// These both operate mod N.
|
||||
static uint increment_index(uint ind) {
|
||||
@ -163,7 +189,7 @@ protected:
|
||||
}
|
||||
|
||||
// Returns the size corresponding to the given "bot" and "top".
|
||||
uint size(uint bot, uint top) const {
|
||||
uint clean_size(uint bot, uint top) const {
|
||||
uint sz = dirty_size(bot, top);
|
||||
// Has the queue "wrapped", so that bottom is less than top? There's a
|
||||
// complicated special case here. A pair of threads could perform pop_local
|
||||
@ -182,27 +208,46 @@ protected:
|
||||
return (sz == N - 1) ? 0 : sz;
|
||||
}
|
||||
|
||||
private:
|
||||
DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE, 0);
|
||||
|
||||
// Index of the first free element after the last one pushed (mod N).
|
||||
volatile uint _bottom;
|
||||
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(uint));
|
||||
|
||||
// top() is the index of the oldest pushed element (mod N), and tag()
|
||||
// is the associated epoch, to distinguish different modifications of
|
||||
// the age. There is no available element if top() == _bottom or
|
||||
// (_bottom - top()) mod N == N-1; the latter indicates underflow
|
||||
// during concurrent pop_local/pop_global.
|
||||
volatile Age _age;
|
||||
DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(Age));
|
||||
|
||||
NONCOPYABLE(TaskQueueSuper);
|
||||
|
||||
public:
|
||||
TaskQueueSuper() : _bottom(0), _age() {}
|
||||
|
||||
// Return true if the TaskQueue contains/does not contain any tasks.
|
||||
bool peek() const { return _bottom != _age.top(); }
|
||||
bool is_empty() const { return size() == 0; }
|
||||
// Return true if the TaskQueue contains any tasks.
|
||||
// Unreliable if there are concurrent pushes or pops.
|
||||
bool peek() const {
|
||||
return bottom_relaxed() != age_top_relaxed();
|
||||
}
|
||||
|
||||
bool is_empty() const {
|
||||
return size() == 0;
|
||||
}
|
||||
|
||||
// Return an estimate of the number of elements in the queue.
|
||||
// The "careful" version admits the possibility of pop_local/pop_global
|
||||
// races.
|
||||
// Treats pop_local/pop_global race that underflows as empty.
|
||||
uint size() const {
|
||||
return size(_bottom, _age.top());
|
||||
}
|
||||
|
||||
uint dirty_size() const {
|
||||
return dirty_size(_bottom, _age.top());
|
||||
return clean_size(bottom_relaxed(), age_top_relaxed());
|
||||
}
|
||||
|
||||
// Discard the contents of the queue.
|
||||
void set_empty() {
|
||||
_bottom = 0;
|
||||
_age.set(0);
|
||||
set_bottom_relaxed(0);
|
||||
set_age_relaxed(Age());
|
||||
}
|
||||
|
||||
// Maximum number of elements allowed in the queue. This is two less
|
||||
@ -211,9 +256,6 @@ public:
|
||||
// in GenericTaskQueue.
|
||||
uint max_elems() const { return N - 2; }
|
||||
|
||||
// Total size of queue.
|
||||
static const uint total_size() { return N; }
|
||||
|
||||
TASKQUEUE_STATS_ONLY(TaskQueueStats stats;)
|
||||
};
|
||||
|
||||
@ -254,11 +296,23 @@ protected:
|
||||
typedef typename TaskQueueSuper<N, F>::Age Age;
|
||||
typedef typename TaskQueueSuper<N, F>::idx_t idx_t;
|
||||
|
||||
using TaskQueueSuper<N, F>::_bottom;
|
||||
using TaskQueueSuper<N, F>::_age;
|
||||
using TaskQueueSuper<N, F>::MOD_N_MASK;
|
||||
|
||||
using TaskQueueSuper<N, F>::bottom_relaxed;
|
||||
using TaskQueueSuper<N, F>::bottom_acquire;
|
||||
|
||||
using TaskQueueSuper<N, F>::set_bottom_relaxed;
|
||||
using TaskQueueSuper<N, F>::release_set_bottom;
|
||||
|
||||
using TaskQueueSuper<N, F>::age_relaxed;
|
||||
using TaskQueueSuper<N, F>::set_age_relaxed;
|
||||
using TaskQueueSuper<N, F>::cmpxchg_age;
|
||||
using TaskQueueSuper<N, F>::age_top_relaxed;
|
||||
|
||||
using TaskQueueSuper<N, F>::increment_index;
|
||||
using TaskQueueSuper<N, F>::decrement_index;
|
||||
using TaskQueueSuper<N, F>::dirty_size;
|
||||
using TaskQueueSuper<N, F>::clean_size;
|
||||
|
||||
public:
|
||||
using TaskQueueSuper<N, F>::max_elems;
|
||||
@ -285,13 +339,14 @@ public:
|
||||
|
||||
// Attempts to claim a task from the "local" end of the queue (the most
|
||||
// recently pushed) as long as the number of entries exceeds the threshold.
|
||||
// If successful, returns true and sets t to the task; otherwise, returns false
|
||||
// (the queue is empty or the number of elements below the threshold).
|
||||
inline bool pop_local(volatile E& t, uint threshold = 0);
|
||||
// If successfully claims a task, returns true and sets t to the task;
|
||||
// otherwise, returns false and t is unspecified. May fail and return
|
||||
// false because of a successful steal by pop_global.
|
||||
inline bool pop_local(E& t, uint threshold = 0);
|
||||
|
||||
// Like pop_local(), but uses the "global" end of the queue (the least
|
||||
// recently pushed).
|
||||
bool pop_global(volatile E& t);
|
||||
bool pop_global(E& t);
|
||||
|
||||
// Delete any resource associated with the queue.
|
||||
~GenericTaskQueue();
|
||||
@ -301,9 +356,10 @@ public:
|
||||
template<typename Fn> void iterate(Fn fn);
|
||||
|
||||
private:
|
||||
DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE, 0);
|
||||
// Base class has trailing padding.
|
||||
|
||||
// Element array.
|
||||
volatile E* _elems;
|
||||
E* _elems;
|
||||
|
||||
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(E*));
|
||||
// Queue owner local variables. Not to be accessed by other threads.
|
||||
@ -444,12 +500,6 @@ public:
|
||||
virtual bool should_exit_termination() = 0;
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
// warning C4522: multiple assignment operators specified
|
||||
#pragma warning(disable:4522)
|
||||
#endif
|
||||
|
||||
// This is a container class for either an oop* or a narrowOop*.
|
||||
// Both are pushed onto a task queue and the consumer will test is_narrow()
|
||||
// to determine which should be processed.
|
||||
@ -468,20 +518,13 @@ class StarTask {
|
||||
_holder = (void*)p;
|
||||
}
|
||||
StarTask() { _holder = NULL; }
|
||||
// Trivially copyable, for use in GenericTaskQueue.
|
||||
|
||||
operator oop*() { return (oop*)_holder; }
|
||||
operator narrowOop*() {
|
||||
return (narrowOop*)((uintptr_t)_holder & ~COMPRESSED_OOP_MASK);
|
||||
}
|
||||
|
||||
StarTask& operator=(const StarTask& t) {
|
||||
_holder = t._holder;
|
||||
return *this;
|
||||
}
|
||||
volatile StarTask& operator=(const volatile StarTask& t) volatile {
|
||||
_holder = t._holder;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool is_narrow() const {
|
||||
return (((uintptr_t)_holder & COMPRESSED_OOP_MASK) != 0);
|
||||
}
|
||||
@ -494,19 +537,7 @@ public:
|
||||
ObjArrayTask(oop o, size_t idx): _obj(o), _index(int(idx)) {
|
||||
assert(idx <= size_t(max_jint), "too big");
|
||||
}
|
||||
ObjArrayTask(const ObjArrayTask& t): _obj(t._obj), _index(t._index) { }
|
||||
|
||||
ObjArrayTask& operator =(const ObjArrayTask& t) {
|
||||
_obj = t._obj;
|
||||
_index = t._index;
|
||||
return *this;
|
||||
}
|
||||
volatile ObjArrayTask&
|
||||
operator =(const volatile ObjArrayTask& t) volatile {
|
||||
(void)const_cast<oop&>(_obj = t._obj);
|
||||
_index = t._index;
|
||||
return *this;
|
||||
}
|
||||
// Trivially copyable, for use in GenericTaskQueue.
|
||||
|
||||
inline oop obj() const { return _obj; }
|
||||
inline int index() const { return _index; }
|
||||
@ -518,8 +549,4 @@ private:
|
||||
int _index;
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#endif // SHARE_GC_SHARED_TASKQUEUE_HPP
|
||||
|
@ -54,14 +54,14 @@ inline void GenericTaskQueue<E, F, N>::initialize() {
|
||||
|
||||
template<class E, MEMFLAGS F, unsigned int N>
|
||||
inline GenericTaskQueue<E, F, N>::~GenericTaskQueue() {
|
||||
ArrayAllocator<E>::free(const_cast<E*>(_elems), N);
|
||||
ArrayAllocator<E>::free(_elems, N);
|
||||
}
|
||||
|
||||
template<class E, MEMFLAGS F, unsigned int N> inline bool
|
||||
GenericTaskQueue<E, F, N>::push(E t) {
|
||||
uint localBot = _bottom;
|
||||
uint localBot = bottom_relaxed();
|
||||
assert(localBot < N, "_bottom out of range.");
|
||||
idx_t top = _age.top();
|
||||
idx_t top = age_top_relaxed();
|
||||
uint dirty_n_elems = dirty_size(localBot, top);
|
||||
// A dirty_size of N-1 cannot happen in push. Considering only push:
|
||||
// (1) dirty_n_elems is initially 0.
|
||||
@ -75,13 +75,8 @@ GenericTaskQueue<E, F, N>::push(E t) {
|
||||
// so can't underflow to -1 (== N-1) with push.
|
||||
assert(dirty_n_elems <= max_elems(), "n_elems out of range.");
|
||||
if (dirty_n_elems < max_elems()) {
|
||||
// g++ complains if the volatile result of the assignment is
|
||||
// unused, so we cast the volatile away. We cannot cast directly
|
||||
// to void, because gcc treats that as not using the result of the
|
||||
// assignment. However, casting to E& means that we trigger an
|
||||
// unused-value warning. So, we cast the E& to void.
|
||||
(void) const_cast<E&>(_elems[localBot] = t);
|
||||
Atomic::release_store(&_bottom, increment_index(localBot));
|
||||
_elems[localBot] = t;
|
||||
release_set_bottom(increment_index(localBot));
|
||||
TASKQUEUE_STATS_ONLY(stats.record_push());
|
||||
return true;
|
||||
}
|
||||
@ -89,8 +84,7 @@ GenericTaskQueue<E, F, N>::push(E t) {
|
||||
}
|
||||
|
||||
template <class E, MEMFLAGS F, unsigned int N>
|
||||
inline bool OverflowTaskQueue<E, F, N>::push(E t)
|
||||
{
|
||||
inline bool OverflowTaskQueue<E, F, N>::push(E t) {
|
||||
if (!taskqueue_t::push(t)) {
|
||||
overflow_stack()->push(t);
|
||||
TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->size()));
|
||||
@ -114,61 +108,56 @@ bool GenericTaskQueue<E, F, N>::pop_local_slow(uint localBot, Age oldAge) {
|
||||
// This queue was observed to contain exactly one element; either this
|
||||
// thread will claim it, or a competing "pop_global". In either case,
|
||||
// the queue will be logically empty afterwards. Create a new Age value
|
||||
// that represents the empty queue for the given value of "_bottom". (We
|
||||
// that represents the empty queue for the given value of "bottom". (We
|
||||
// must also increment "tag" because of the case where "bottom == 1",
|
||||
// "top == 0". A pop_global could read the queue element in that case,
|
||||
// then have the owner thread do a pop followed by another push. Without
|
||||
// the incrementing of "tag", the pop_global's CAS could succeed,
|
||||
// allowing it to believe it has claimed the stale element.)
|
||||
Age newAge((idx_t)localBot, oldAge.tag() + 1);
|
||||
Age newAge((idx_t)localBot, (idx_t)(oldAge.tag() + 1));
|
||||
// Perhaps a competing pop_global has already incremented "top", in which
|
||||
// case it wins the element.
|
||||
if (localBot == oldAge.top()) {
|
||||
// No competing pop_global has yet incremented "top"; we'll try to
|
||||
// install new_age, thus claiming the element.
|
||||
Age tempAge = _age.cmpxchg(newAge, oldAge);
|
||||
Age tempAge = cmpxchg_age(oldAge, newAge);
|
||||
if (tempAge == oldAge) {
|
||||
// We win.
|
||||
assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
|
||||
assert(dirty_size(localBot, age_top_relaxed()) != N - 1, "sanity");
|
||||
TASKQUEUE_STATS_ONLY(stats.record_pop_slow());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// We lose; a completing pop_global gets the element. But the queue is empty
|
||||
// We lose; a competing pop_global got the element. But the queue is empty
|
||||
// and top is greater than bottom. Fix this representation of the empty queue
|
||||
// to become the canonical one.
|
||||
_age.set(newAge);
|
||||
assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
|
||||
set_age_relaxed(newAge);
|
||||
assert(dirty_size(localBot, age_top_relaxed()) != N - 1, "sanity");
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class E, MEMFLAGS F, unsigned int N> inline bool
|
||||
GenericTaskQueue<E, F, N>::pop_local(volatile E& t, uint threshold) {
|
||||
uint localBot = _bottom;
|
||||
GenericTaskQueue<E, F, N>::pop_local(E& t, uint threshold) {
|
||||
uint localBot = bottom_relaxed();
|
||||
// This value cannot be N-1. That can only occur as a result of
|
||||
// the assignment to bottom in this method. If it does, this method
|
||||
// resets the size to 0 before the next call (which is sequential,
|
||||
// since this is pop_local.)
|
||||
uint dirty_n_elems = dirty_size(localBot, _age.top());
|
||||
uint dirty_n_elems = dirty_size(localBot, age_top_relaxed());
|
||||
assert(dirty_n_elems != N - 1, "Shouldn't be possible...");
|
||||
if (dirty_n_elems <= threshold) return false;
|
||||
localBot = decrement_index(localBot);
|
||||
_bottom = localBot;
|
||||
set_bottom_relaxed(localBot);
|
||||
// This is necessary to prevent any read below from being reordered
|
||||
// before the store just above.
|
||||
OrderAccess::fence();
|
||||
// g++ complains if the volatile result of the assignment is
|
||||
// unused, so we cast the volatile away. We cannot cast directly
|
||||
// to void, because gcc treats that as not using the result of the
|
||||
// assignment. However, casting to E& means that we trigger an
|
||||
// unused-value warning. So, we cast the E& to void.
|
||||
(void) const_cast<E&>(t = _elems[localBot]);
|
||||
t = _elems[localBot];
|
||||
// This is a second read of "age"; the "size()" above is the first.
|
||||
// If there's still at least one element in the queue, based on the
|
||||
// "_bottom" and "age" we've read, then there can be no interference with
|
||||
// a "pop_global" operation, and we're done.
|
||||
idx_t tp = _age.top(); // XXX
|
||||
if (size(localBot, tp) > 0) {
|
||||
idx_t tp = age_top_relaxed();
|
||||
if (clean_size(localBot, tp) > 0) {
|
||||
assert(dirty_size(localBot, tp) != N - 1, "sanity");
|
||||
TASKQUEUE_STATS_ONLY(stats.record_pop());
|
||||
return true;
|
||||
@ -176,11 +165,11 @@ GenericTaskQueue<E, F, N>::pop_local(volatile E& t, uint threshold) {
|
||||
// Otherwise, the queue contained exactly one element; we take the slow
|
||||
// path.
|
||||
|
||||
// The barrier is required to prevent reordering the two reads of _age:
|
||||
// one is the _age.get() below, and the other is _age.top() above the if-stmt.
|
||||
// The algorithm may fail if _age.get() reads an older value than _age.top().
|
||||
// The barrier is required to prevent reordering the two reads of age:
|
||||
// one is the age() below, and the other is age_top() above the if-stmt.
|
||||
// The algorithm may fail if age() reads an older value than age_top().
|
||||
OrderAccess::loadload();
|
||||
return pop_local_slow(localBot, _age.get());
|
||||
return pop_local_slow(localBot, age_relaxed());
|
||||
}
|
||||
}
|
||||
|
||||
@ -192,9 +181,30 @@ bool OverflowTaskQueue<E, F, N>::pop_overflow(E& t)
|
||||
return true;
|
||||
}
|
||||
|
||||
// A pop_global operation may read an element that is being concurrently
|
||||
// written by a push operation. The pop_global operation will not use
|
||||
// such an element, returning failure instead. But the concurrent read
|
||||
// and write places requirements on the element type.
|
||||
//
|
||||
// Strictly, such concurrent reads and writes are undefined behavior.
|
||||
// We ignore that. Instead we require that whatever value tearing may
|
||||
// occur as a result is benign. A trivially copyable type (C++14 3.9/9)
|
||||
// satisfies the requirement. But we might use classes such as oop that
|
||||
// are not trivially copyable (in some build configurations). Such
|
||||
// classes need to be carefully examined with this requirement in mind.
|
||||
//
|
||||
// The sequence where such a read/write collision can arise is as follows.
|
||||
// Assume there is one value in the queue, so bottom == top+1.
|
||||
// (1) Thief is doing a pop_global. It has read age and bottom, and its
|
||||
// captured (localBottom - oldAge.top) == 1.
|
||||
// (2) Owner does a pop_local and wins the race for that element. It
|
||||
// decrements bottom and increments the age tag.
|
||||
// (3) Owner starts a push, writing elems[bottom]. At the same time, Thief
|
||||
// reads elems[oldAge.top]. The owner's bottom == the thief's oldAge.top.
|
||||
// (4) Thief will discard the read value, because its cmpxchg of age will fail.
|
||||
template<class E, MEMFLAGS F, unsigned int N>
|
||||
bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) {
|
||||
Age oldAge = _age.get();
|
||||
bool GenericTaskQueue<E, F, N>::pop_global(E& t) {
|
||||
Age oldAge = age_relaxed();
|
||||
#ifndef CPU_MULTI_COPY_ATOMIC
|
||||
// Architectures with non-multi-copy-atomic memory model require a
|
||||
// full fence here to guarantee that bottom is not older than age,
|
||||
@ -212,26 +222,24 @@ bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) {
|
||||
OrderAccess::fence();
|
||||
#else
|
||||
// Everyone else can make do with a LoadLoad barrier to keep reads
|
||||
// from _age and _bottom in order.
|
||||
// from age and bottom in order.
|
||||
OrderAccess::loadload();
|
||||
#endif
|
||||
uint localBot = Atomic::load_acquire(&_bottom);
|
||||
uint n_elems = size(localBot, oldAge.top());
|
||||
uint localBot = bottom_acquire();
|
||||
uint n_elems = clean_size(localBot, oldAge.top());
|
||||
if (n_elems == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// g++ complains if the volatile result of the assignment is
|
||||
// unused, so we cast the volatile away. We cannot cast directly
|
||||
// to void, because gcc treats that as not using the result of the
|
||||
// assignment. However, casting to E& means that we trigger an
|
||||
// unused-value warning. So, we cast the E& to void.
|
||||
(void) const_cast<E&>(t = _elems[oldAge.top()]);
|
||||
Age newAge(oldAge);
|
||||
newAge.increment();
|
||||
Age resAge = _age.cmpxchg(newAge, oldAge);
|
||||
t = _elems[oldAge.top()];
|
||||
// Increment top; if it wraps, also increment tag, to distinguish it
|
||||
// from any recent _age for the same top() index.
|
||||
idx_t new_top = increment_index(oldAge.top());
|
||||
idx_t new_tag = oldAge.tag() + ((new_top == 0) ? 1 : 0);
|
||||
Age newAge(new_top, new_tag);
|
||||
Age resAge = cmpxchg_age(oldAge, newAge);
|
||||
|
||||
// Note that using "_bottom" here might fail, since a pop_local might
|
||||
// Note that using "bottom" here might fail, since a pop_local might
|
||||
// have decremented it.
|
||||
assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity");
|
||||
return resAge == oldAge;
|
||||
@ -324,19 +332,14 @@ GenericTaskQueueSet<T, F>::steal(uint queue_num, E& t) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <unsigned int N, MEMFLAGS F>
|
||||
inline typename TaskQueueSuper<N, F>::Age TaskQueueSuper<N, F>::Age::cmpxchg(const Age new_age, const Age old_age) volatile {
|
||||
return Atomic::cmpxchg(&_data, old_age._data, new_age._data);
|
||||
}
|
||||
|
||||
template<class E, MEMFLAGS F, unsigned int N>
|
||||
template<class Fn>
|
||||
inline void GenericTaskQueue<E, F, N>::iterate(Fn fn) {
|
||||
uint iters = size();
|
||||
uint index = _bottom;
|
||||
uint index = bottom_relaxed();
|
||||
for (uint i = 0; i < iters; ++i) {
|
||||
index = decrement_index(index);
|
||||
fn(const_cast<E&>(_elems[index])); // cast away volatility
|
||||
fn(_elems[index]);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user