8230404: Refactor logged card refinement support in G1DirtyCardQueueSet

Separate concurrent refinement from STW refinement.

Reviewed-by: sjohanss, tschatzl
This commit is contained in:
Kim Barrett 2019-09-06 13:38:55 -04:00
parent c159a4e0f2
commit 40565a845c
7 changed files with 91 additions and 133 deletions

@ -26,6 +26,7 @@
#define SHARE_GC_G1_G1CARDTABLEENTRYCLOSURE_HPP
#include "gc/shared/cardTable.hpp"
#include "gc/shared/ptrQueue.hpp"
#include "memory/allocation.hpp"
// A closure class for processing card table entries. Note that we don't
@ -34,9 +35,17 @@ class G1CardTableEntryClosure: public CHeapObj<mtGC> {
public:
typedef CardTable::CardValue CardValue;
// Process the card whose card table entry is "card_ptr". If returns
// "false", terminate the iteration early.
virtual bool do_card_ptr(CardValue* card_ptr, uint worker_id) = 0;
// Process the card whose card table entry is "card_ptr".
virtual void do_card_ptr(CardValue* card_ptr, uint worker_id) = 0;
// Process all the card_ptrs in node.
void apply_to_buffer(BufferNode* node, size_t buffer_size, uint worker_id) {
void** buffer = BufferNode::make_buffer_from_node(node);
for (size_t i = node->index(); i < buffer_size; ++i) {
CardValue* card_ptr = static_cast<CardValue*>(buffer[i]);
do_card_ptr(card_ptr, worker_id);
}
}
};
#endif // SHARE_GC_G1_G1CARDTABLEENTRYCLOSURE_HPP

@ -132,7 +132,7 @@ class RedirtyLoggedCardTableEntryClosure : public G1CardTableEntryClosure {
RedirtyLoggedCardTableEntryClosure(G1CollectedHeap* g1h) : G1CardTableEntryClosure(),
_num_dirtied(0), _g1h(g1h), _g1_ct(g1h->card_table()) { }
bool do_card_ptr(CardValue* card_ptr, uint worker_i) {
void do_card_ptr(CardValue* card_ptr, uint worker_i) {
HeapRegion* hr = region_for_card(card_ptr);
// Should only dirty cards in regions that won't be freed.
@ -140,8 +140,6 @@ class RedirtyLoggedCardTableEntryClosure : public G1CardTableEntryClosure {
*card_ptr = G1CardTable::dirty_card_val();
_num_dirtied++;
}
return true;
}
size_t num_dirtied() const { return _num_dirtied; }
@ -1948,12 +1946,6 @@ void G1CollectedHeap::iterate_hcc_closure(G1CardTableEntryClosure* cl, uint work
_hot_card_cache->drain(cl, worker_i);
}
void G1CollectedHeap::iterate_dirty_card_closure(G1CardTableEntryClosure* cl, uint worker_i) {
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
while (dcqs.apply_closure_during_gc(cl, worker_i)) {}
assert(dcqs.num_cards() == 0, "Completed buffers exist!");
}
// Computes the sum of the storage used by the various regions.
size_t G1CollectedHeap::used() const {
size_t result = _summary_bytes_used + _allocator->used_in_alloc_regions();
@ -3225,23 +3217,14 @@ class G1RedirtyLoggedCardsTask : public AbstractGangTask {
G1CollectedHeap* _g1h;
BufferNode* volatile _nodes;
void apply(G1CardTableEntryClosure* cl, BufferNode* node, uint worker_id) {
void** buf = BufferNode::make_buffer_from_node(node);
size_t limit = _qset->buffer_size();
for (size_t i = node->index(); i < limit; ++i) {
CardTable::CardValue* card_ptr = static_cast<CardTable::CardValue*>(buf[i]);
bool result = cl->do_card_ptr(card_ptr, worker_id);
assert(result, "Closure should always return true");
}
}
void par_apply(G1CardTableEntryClosure* cl, uint worker_id) {
void par_apply(RedirtyLoggedCardTableEntryClosure* cl, uint worker_id) {
size_t buffer_size = _qset->buffer_size();
BufferNode* next = Atomic::load(&_nodes);
while (next != NULL) {
BufferNode* node = next;
next = Atomic::cmpxchg(node->next(), &_nodes, node);
if (next == node) {
apply(cl, node, worker_id);
cl->apply_to_buffer(node, buffer_size, worker_id);
next = node->next();
}
}

@ -993,9 +993,6 @@ public:
// Apply the given closure on all cards in the Hot Card Cache, emptying it.
void iterate_hcc_closure(G1CardTableEntryClosure* cl, uint worker_i);
// Apply the given closure on all cards in the Dirty Card Queue Set, emptying it.
void iterate_dirty_card_closure(G1CardTableEntryClosure* cl, uint worker_i);
// The shared block offset table array.
G1BlockOffsetTable* bot() const { return _bot; }

@ -41,24 +41,6 @@
#include "runtime/thread.inline.hpp"
#include "runtime/threadSMR.hpp"
// Closure used for updating remembered sets and recording references that
// point into the collection set while the mutator is running.
// Assumed to be only executed concurrently with the mutator. Yields via
// SuspendibleThreadSet after every card.
class G1RefineCardConcurrentlyClosure: public G1CardTableEntryClosure {
public:
bool do_card_ptr(CardValue* card_ptr, uint worker_i) {
G1CollectedHeap::heap()->rem_set()->refine_card_concurrently(card_ptr, worker_i);
if (SuspendibleThreadSet::should_yield()) {
// Caller will actually yield.
return false;
}
// Otherwise, we finished successfully; return true.
return true;
}
};
G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
// Dirty card queues are always active, so we create them with their
// active field set to true.
@ -228,27 +210,29 @@ void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) {
verify_num_cards();
}
bool G1DirtyCardQueueSet::apply_closure_to_buffer(G1CardTableEntryClosure* cl,
BufferNode* node,
uint worker_i) {
if (cl == NULL) return true;
bool result = true;
void** buf = BufferNode::make_buffer_from_node(node);
size_t i = node->index();
size_t limit = buffer_size();
for ( ; i < limit; ++i) {
CardTable::CardValue* card_ptr = static_cast<CardTable::CardValue*>(buf[i]);
assert(card_ptr != NULL, "invariant");
if (!cl->do_card_ptr(card_ptr, worker_i)) {
result = false; // Incomplete processing.
break;
}
}
assert(i <= buffer_size(), "invariant");
node->set_index(i);
G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
G1BufferNodeList result(_completed_buffers_head, _completed_buffers_tail, _num_cards);
_completed_buffers_head = NULL;
_completed_buffers_tail = NULL;
_num_cards = 0;
return result;
}
bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, uint worker_id) {
G1RemSet* rem_set = G1CollectedHeap::heap()->rem_set();
size_t size = buffer_size();
void** buffer = BufferNode::make_buffer_from_node(node);
size_t i = node->index();
assert(i <= size, "invariant");
for ( ; (i < size) && !SuspendibleThreadSet::should_yield(); ++i) {
CardTable::CardValue* cp = static_cast<CardTable::CardValue*>(buffer[i]);
rem_set->refine_card_concurrently(cp, worker_id);
}
node->set_index(i);
return i == size;
}
#ifndef ASSERT
#define assert_fully_consumed(node, buffer_size)
#else
@ -282,8 +266,7 @@ bool G1DirtyCardQueueSet::process_or_enqueue_completed_buffer(BufferNode* node)
bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) {
uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
G1RefineCardConcurrentlyClosure cl;
bool result = apply_closure_to_buffer(&cl, node, worker_id);
bool result = refine_buffer(node, worker_id);
_free_ids.release_par_id(worker_id); // release the id
if (result) {
@ -293,35 +276,19 @@ bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) {
return result;
}
bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_i, size_t stop_at) {
G1RefineCardConcurrentlyClosure cl;
return apply_closure_to_completed_buffer(&cl, worker_i, stop_at, false);
}
bool G1DirtyCardQueueSet::apply_closure_during_gc(G1CardTableEntryClosure* cl, uint worker_i) {
assert_at_safepoint();
return apply_closure_to_completed_buffer(cl, worker_i, 0, true);
}
bool G1DirtyCardQueueSet::apply_closure_to_completed_buffer(G1CardTableEntryClosure* cl,
uint worker_i,
size_t stop_at,
bool during_pause) {
assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause");
BufferNode* nd = get_completed_buffer(stop_at);
if (nd == NULL) {
bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id, size_t stop_at) {
BufferNode* node = get_completed_buffer(stop_at);
if (node == NULL) {
return false;
} else if (refine_buffer(node, worker_id)) {
assert_fully_consumed(node, buffer_size());
// Done with fully processed buffer.
deallocate_buffer(node);
Atomic::inc(&_processed_buffers_rs_thread);
return true;
} else {
if (apply_closure_to_buffer(cl, nd, worker_i)) {
assert_fully_consumed(nd, buffer_size());
// Done with fully processed buffer.
deallocate_buffer(nd);
Atomic::inc(&_processed_buffers_rs_thread);
} else {
// Return partially processed buffer to the queue.
guarantee(!during_pause, "Should never stop early");
enqueue_completed_buffer(nd);
}
// Return partially processed buffer to the queue.
enqueue_completed_buffer(node);
return true;
}
}

@ -25,11 +25,11 @@
#ifndef SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
#define SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
#include "gc/g1/g1BufferNodeList.hpp"
#include "gc/g1/g1FreeIdSet.hpp"
#include "gc/shared/ptrQueue.hpp"
#include "memory/allocation.hpp"
class G1CardTableEntryClosure;
class G1DirtyCardQueueSet;
class G1RedirtyCardsQueueSet;
class Thread;
@ -78,34 +78,14 @@ class G1DirtyCardQueueSet: public PtrQueueSet {
void abandon_completed_buffers();
// Apply the closure to the elements of "node" from it's index to
// buffer_size. If all closure applications return true, then
// returns true. Stops processing after the first closure
// application that returns false, and returns false from this
// function. The node's index is updated to exclude the processed
// elements, e.g. up to the element for which the closure returned
// false, or one past the last element if the closure always
// returned true.
bool apply_closure_to_buffer(G1CardTableEntryClosure* cl,
BufferNode* node,
uint worker_i = 0);
// If there are more than stop_at completed buffers, pop one, apply
// the specified closure to its active elements, and return true.
// Otherwise return false.
//
// A completely processed buffer is freed. However, if a closure
// invocation returns false, processing is stopped and the partially
// processed buffer (with its index updated to exclude the processed
// elements, e.g. up to the element for which the closure returned
// false) is returned to the completed buffer set.
//
// If during_pause is true, stop_at must be zero, and the closure
// must never return false.
bool apply_closure_to_completed_buffer(G1CardTableEntryClosure* cl,
uint worker_i,
size_t stop_at,
bool during_pause);
// Refine the cards in "node" from it's index to buffer_size.
// Stops processing if SuspendibleThreadSet::should_yield() is true.
// Returns true if the entire buffer was processed, false if there
// is a pending yield request. The node's index is updated to exclude
// the processed elements, e.g. up to the element before processing
// stopped, or one past the last element if the entire buffer was
// processed.
bool refine_buffer(BufferNode* node, uint worker_id);
bool mut_process_buffer(BufferNode* node);
@ -171,13 +151,16 @@ public:
void merge_bufferlists(G1RedirtyCardsQueueSet* src);
// Apply G1RefineCardConcurrentlyClosure to completed buffers until there are stop_at
// completed buffers remaining.
bool refine_completed_buffer_concurrently(uint worker_i, size_t stop_at);
G1BufferNodeList take_all_completed_buffers();
// Apply the given closure to all completed buffers. The given closure's do_card_ptr
// must never return false. Must only be called during GC.
bool apply_closure_during_gc(G1CardTableEntryClosure* cl, uint worker_i);
// If there are more than stop_at cards in the completed buffers, pop
// a buffer, refine its contents, and return true. Otherwise return
// false.
//
// Stops processing a buffer if SuspendibleThreadSet::should_yield(),
// returning the incompletely processed buffer to the completed buffer
// list, for later processing of the remainder.
bool refine_completed_buffer_concurrently(uint worker_i, size_t stop_at);
// If a full collection is happening, reset partial logs, and release
// completed ones: the full collection will make them all irrelevant.

@ -99,8 +99,7 @@ void G1HotCardCache::drain(G1CardTableEntryClosure* cl, uint worker_i) {
for (size_t i = start_idx; i < end_idx; i++) {
CardValue* card_ptr = _hot_cache[i];
if (card_ptr != NULL) {
bool result = cl->do_card_ptr(card_ptr, worker_i);
assert(result, "Closure should always return true");
cl->do_card_ptr(card_ptr, worker_i);
} else {
break;
}

@ -42,6 +42,7 @@
#include "gc/g1/heapRegionRemSet.inline.hpp"
#include "gc/g1/sparsePRT.hpp"
#include "gc/shared/gcTraceTime.inline.hpp"
#include "gc/shared/ptrQueue.hpp"
#include "gc/shared/suspendibleThreadSet.hpp"
#include "jfr/jfrEvents.hpp"
#include "memory/iterator.hpp"
@ -1060,7 +1061,7 @@ class G1MergeHeapRootsTask : public AbstractGangTask {
_scan_state(scan_state), _ct(g1h->card_table()), _cards_dirty(0), _cards_skipped(0)
{}
bool do_card_ptr(CardValue* card_ptr, uint worker_i) {
void do_card_ptr(CardValue* card_ptr, uint worker_i) {
// The only time we care about recording cards that
// contain references that point into the collection set
// is during RSet updating within an evacuation pause.
@ -1084,7 +1085,6 @@ class G1MergeHeapRootsTask : public AbstractGangTask {
// regions to clear the card table at the end during the prepare() phase.
_cards_skipped++;
}
return true;
}
size_t cards_dirty() const { return _cards_dirty; }
@ -1093,17 +1093,37 @@ class G1MergeHeapRootsTask : public AbstractGangTask {
HeapRegionClaimer _hr_claimer;
G1RemSetScanState* _scan_state;
BufferNode::Stack _dirty_card_buffers;
bool _initial_evacuation;
volatile bool _fast_reclaim_handled;
void apply_closure_to_dirty_card_buffers(G1MergeLogBufferCardsClosure* cl, uint worker_id) {
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
size_t buffer_size = dcqs.buffer_size();
while (BufferNode* node = _dirty_card_buffers.pop()) {
cl->apply_to_buffer(node, buffer_size, worker_id);
dcqs.deallocate_buffer(node);
}
}
public:
G1MergeHeapRootsTask(G1RemSetScanState* scan_state, uint num_workers, bool initial_evacuation) :
AbstractGangTask("G1 Merge Heap Roots"),
_hr_claimer(num_workers),
_scan_state(scan_state),
_dirty_card_buffers(),
_initial_evacuation(initial_evacuation),
_fast_reclaim_handled(false) { }
_fast_reclaim_handled(false)
{
if (initial_evacuation) {
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
G1BufferNodeList buffers = dcqs.take_all_completed_buffers();
if (buffers._entry_count != 0) {
_dirty_card_buffers.prepend(*buffers._head, *buffers._tail);
}
}
}
virtual void work(uint worker_id) {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
@ -1158,7 +1178,7 @@ public:
G1GCParPhaseTimesTracker x(p, G1GCPhaseTimes::MergeLB, worker_id);
G1MergeLogBufferCardsClosure cl(g1h, _scan_state);
g1h->iterate_dirty_card_closure(&cl, worker_id);
apply_closure_to_dirty_card_buffers(&cl, worker_id);
p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeLBDirtyCards);
p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_skipped(), G1GCPhaseTimes::MergeLBSkippedCards);