8087198: G1 card refinement: batching, sorting
Reviewed-by: tschatzl, kbarrett
This commit is contained in:
parent
6025207be8
commit
5f2ac35cd0
@ -41,6 +41,7 @@
|
||||
#include "runtime/safepoint.hpp"
|
||||
#include "runtime/thread.inline.hpp"
|
||||
#include "runtime/threadSMR.hpp"
|
||||
#include "utilities/quickSort.hpp"
|
||||
|
||||
G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
|
||||
// Dirty card queues are always active, so we create them with their
|
||||
@ -226,21 +227,127 @@ G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
|
||||
return result;
|
||||
}
|
||||
|
||||
class G1RefineBufferedCards : public StackObj {
|
||||
BufferNode* const _node;
|
||||
CardTable::CardValue** const _node_buffer;
|
||||
const size_t _node_buffer_size;
|
||||
const uint _worker_id;
|
||||
size_t* _total_refined_cards;
|
||||
G1RemSet* const _g1rs;
|
||||
|
||||
static inline int compare_card(const CardTable::CardValue* p1,
|
||||
const CardTable::CardValue* p2) {
|
||||
return p2 - p1;
|
||||
}
|
||||
|
||||
// Sorts the cards from start_index to _node_buffer_size in *decreasing*
|
||||
// address order. Tests showed that this order is preferable to not sorting
|
||||
// or increasing address order.
|
||||
void sort_cards(size_t start_index) {
|
||||
QuickSort::sort(&_node_buffer[start_index],
|
||||
_node_buffer_size - start_index,
|
||||
compare_card,
|
||||
false);
|
||||
}
|
||||
|
||||
// Returns the index to the first clean card in the buffer.
|
||||
size_t clean_cards() {
|
||||
const size_t start = _node->index();
|
||||
assert(start <= _node_buffer_size, "invariant");
|
||||
|
||||
// Two-fingered compaction algorithm similar to the filtering mechanism in
|
||||
// SATBMarkQueue. The main difference is that clean_card_before_refine()
|
||||
// could change the buffer element in-place.
|
||||
// We don't check for SuspendibleThreadSet::should_yield(), because
|
||||
// cleaning and redirtying the cards is fast.
|
||||
CardTable::CardValue** src = &_node_buffer[start];
|
||||
CardTable::CardValue** dst = &_node_buffer[_node_buffer_size];
|
||||
assert(src <= dst, "invariant");
|
||||
for ( ; src < dst; ++src) {
|
||||
// Search low to high for a card to keep.
|
||||
if (_g1rs->clean_card_before_refine(src)) {
|
||||
// Found keeper. Search high to low for a card to discard.
|
||||
while (src < --dst) {
|
||||
if (!_g1rs->clean_card_before_refine(dst)) {
|
||||
*dst = *src; // Replace discard with keeper.
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If discard search failed (src == dst), the outer loop will also end.
|
||||
}
|
||||
}
|
||||
|
||||
// dst points to the first retained clean card, or the end of the buffer
|
||||
// if all the cards were discarded.
|
||||
const size_t first_clean = dst - _node_buffer;
|
||||
assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant");
|
||||
// Discarded cards are considered as refined.
|
||||
*_total_refined_cards += first_clean - start;
|
||||
return first_clean;
|
||||
}
|
||||
|
||||
bool refine_cleaned_cards(size_t start_index) {
|
||||
bool result = true;
|
||||
size_t i = start_index;
|
||||
for ( ; i < _node_buffer_size; ++i) {
|
||||
if (SuspendibleThreadSet::should_yield()) {
|
||||
redirty_unrefined_cards(i);
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
_g1rs->refine_card_concurrently(_node_buffer[i], _worker_id);
|
||||
}
|
||||
_node->set_index(i);
|
||||
*_total_refined_cards += i - start_index;
|
||||
return result;
|
||||
}
|
||||
|
||||
void redirty_unrefined_cards(size_t start) {
|
||||
for ( ; start < _node_buffer_size; ++start) {
|
||||
*_node_buffer[start] = G1CardTable::dirty_card_val();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
G1RefineBufferedCards(BufferNode* node,
|
||||
size_t node_buffer_size,
|
||||
uint worker_id,
|
||||
size_t* total_refined_cards) :
|
||||
_node(node),
|
||||
_node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))),
|
||||
_node_buffer_size(node_buffer_size),
|
||||
_worker_id(worker_id),
|
||||
_total_refined_cards(total_refined_cards),
|
||||
_g1rs(G1CollectedHeap::heap()->rem_set()) {}
|
||||
|
||||
bool refine() {
|
||||
size_t first_clean_index = clean_cards();
|
||||
if (first_clean_index == _node_buffer_size) {
|
||||
_node->set_index(first_clean_index);
|
||||
return true;
|
||||
}
|
||||
// This fence serves two purposes. First, the cards must be cleaned
|
||||
// before processing the contents. Second, we can't proceed with
|
||||
// processing a region until after the read of the region's top in
|
||||
// collect_and_clean_cards(), for synchronization with possibly concurrent
|
||||
// humongous object allocation (see comment at the StoreStore fence before
|
||||
// setting the regions' tops in humongous allocation path).
|
||||
// It's okay that reading region's top and reading region's type were racy
|
||||
// wrto each other. We need both set, in any order, to proceed.
|
||||
OrderAccess::fence();
|
||||
sort_cards(first_clean_index);
|
||||
return refine_cleaned_cards(first_clean_index);
|
||||
}
|
||||
};
|
||||
|
||||
bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
|
||||
uint worker_id,
|
||||
size_t* total_refined_cards) {
|
||||
G1RemSet* rem_set = G1CollectedHeap::heap()->rem_set();
|
||||
size_t size = buffer_size();
|
||||
void** buffer = BufferNode::make_buffer_from_node(node);
|
||||
size_t i = node->index();
|
||||
assert(i <= size, "invariant");
|
||||
for ( ; (i < size) && !SuspendibleThreadSet::should_yield(); ++i) {
|
||||
CardTable::CardValue* cp = static_cast<CardTable::CardValue*>(buffer[i]);
|
||||
rem_set->refine_card_concurrently(cp, worker_id);
|
||||
}
|
||||
*total_refined_cards += (i - node->index());
|
||||
node->set_index(i);
|
||||
return i == size;
|
||||
G1RefineBufferedCards buffered_cards(node,
|
||||
buffer_size(),
|
||||
worker_id,
|
||||
total_refined_cards);
|
||||
return buffered_cards.refine();
|
||||
}
|
||||
|
||||
#ifndef ASSERT
|
||||
|
@ -1261,25 +1261,27 @@ inline void check_card_ptr(CardTable::CardValue* card_ptr, G1CardTable* ct) {
|
||||
#endif
|
||||
}
|
||||
|
||||
void G1RemSet::refine_card_concurrently(CardValue* card_ptr,
|
||||
uint worker_id) {
|
||||
bool G1RemSet::clean_card_before_refine(CardValue** const card_ptr_addr) {
|
||||
assert(!_g1h->is_gc_active(), "Only call concurrently");
|
||||
|
||||
// Construct the region representing the card.
|
||||
CardValue* card_ptr = *card_ptr_addr;
|
||||
// Find the start address represented by the card.
|
||||
HeapWord* start = _ct->addr_for(card_ptr);
|
||||
// And find the region containing it.
|
||||
HeapRegion* r = _g1h->heap_region_containing_or_null(start);
|
||||
|
||||
// If this is a (stale) card into an uncommitted region, exit.
|
||||
if (r == NULL) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
check_card_ptr(card_ptr, _ct);
|
||||
|
||||
// If the card is no longer dirty, nothing to do.
|
||||
// We cannot load the card value before the "r == NULL" check, because G1
|
||||
// could uncommit parts of the card table covering uncommitted regions.
|
||||
if (*card_ptr != G1CardTable::dirty_card_val()) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
// This check is needed for some uncommon cases where we should
|
||||
@ -1302,7 +1304,7 @@ void G1RemSet::refine_card_concurrently(CardValue* card_ptr,
|
||||
// enqueueing of the card and processing it here will have ensured
|
||||
// we see the up-to-date region type here.
|
||||
if (!r->is_old_or_humongous_or_archive()) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
// The result from the hot card cache insert call is either:
|
||||
@ -1321,7 +1323,7 @@ void G1RemSet::refine_card_concurrently(CardValue* card_ptr,
|
||||
card_ptr = _hot_card_cache->insert(card_ptr);
|
||||
if (card_ptr == NULL) {
|
||||
// There was no eviction. Nothing to do.
|
||||
return;
|
||||
return false;
|
||||
} else if (card_ptr != orig_card_ptr) {
|
||||
// Original card was inserted and an old card was evicted.
|
||||
start = _ct->addr_for(card_ptr);
|
||||
@ -1331,8 +1333,9 @@ void G1RemSet::refine_card_concurrently(CardValue* card_ptr,
|
||||
// ignored, as discussed earlier for the original card. The
|
||||
// region could have been freed while in the cache.
|
||||
if (!r->is_old_or_humongous_or_archive()) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
*card_ptr_addr = card_ptr;
|
||||
} // Else we still have the original card.
|
||||
}
|
||||
|
||||
@ -1341,18 +1344,19 @@ void G1RemSet::refine_card_concurrently(CardValue* card_ptr,
|
||||
// (part of) an object at the end of the allocated space and extend
|
||||
// beyond the end of allocation.
|
||||
|
||||
// Non-humongous objects are only allocated in the old-gen during
|
||||
// GC, so if region is old then top is stable. Humongous object
|
||||
// allocation sets top last; if top has not yet been set, this is
|
||||
// a stale card and we'll end up with an empty intersection. If
|
||||
// this is not a stale card, the synchronization between the
|
||||
// Non-humongous objects are either allocated in the old regions during GC,
|
||||
// or mapped in archive regions during startup. So if region is old or
|
||||
// archive then top is stable.
|
||||
// Humongous object allocation sets top last; if top has not yet been set,
|
||||
// this is a stale card and we'll end up with an empty intersection.
|
||||
// If this is not a stale card, the synchronization between the
|
||||
// enqueuing of the card and processing it here will have ensured
|
||||
// we see the up-to-date top here.
|
||||
HeapWord* scan_limit = r->top();
|
||||
|
||||
if (scan_limit <= start) {
|
||||
// If the trimmed region is empty, the card must be stale.
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Okay to clean and process the card now. There are still some
|
||||
@ -1360,13 +1364,26 @@ void G1RemSet::refine_card_concurrently(CardValue* card_ptr,
|
||||
// as iteration failure.
|
||||
*const_cast<volatile CardValue*>(card_ptr) = G1CardTable::clean_card_val();
|
||||
|
||||
// This fence serves two purposes. First, the card must be cleaned
|
||||
// before processing the contents. Second, we can't proceed with
|
||||
// processing until after the read of top, for synchronization with
|
||||
// possibly concurrent humongous object allocation. It's okay that
|
||||
// reading top and reading type were racy wrto each other. We need
|
||||
// both set, in any order, to proceed.
|
||||
OrderAccess::fence();
|
||||
return true;
|
||||
}
|
||||
|
||||
void G1RemSet::refine_card_concurrently(CardValue* const card_ptr,
|
||||
const uint worker_id) {
|
||||
assert(!_g1h->is_gc_active(), "Only call concurrently");
|
||||
check_card_ptr(card_ptr, _ct);
|
||||
|
||||
// Construct the MemRegion representing the card.
|
||||
HeapWord* start = _ct->addr_for(card_ptr);
|
||||
// And find the region containing it.
|
||||
HeapRegion* r = _g1h->heap_region_containing(start);
|
||||
// This reload of the top is safe even though it happens after the full
|
||||
// fence, because top is stable for old, archive and unfiltered humongous
|
||||
// regions, so it must return the same value as the previous load when
|
||||
// cleaning the card. Also cleaning the card and refinement of the card
|
||||
// cannot span across safepoint, so we don't need to worry about top being
|
||||
// changed during safepoint.
|
||||
HeapWord* scan_limit = r->top();
|
||||
assert(scan_limit > start, "sanity");
|
||||
|
||||
// Don't use addr_for(card_ptr + 1) which can ask for
|
||||
// a card beyond the heap.
|
||||
|
@ -113,10 +113,17 @@ public:
|
||||
G1GCPhaseTimes::GCParPhases coderoots_phase,
|
||||
G1GCPhaseTimes::GCParPhases objcopy_phase);
|
||||
|
||||
// Refine the card corresponding to "card_ptr". Safe to be called concurrently
|
||||
// to the mutator.
|
||||
void refine_card_concurrently(CardValue* card_ptr,
|
||||
uint worker_id);
|
||||
// Two methods for concurrent refinement support, executed concurrently to
|
||||
// the mutator:
|
||||
// Cleans the card at "*card_ptr_addr" before refinement, returns true iff the
|
||||
// card needs later refinement. Note that "*card_ptr_addr" could be updated to
|
||||
// a different card due to use of hot card cache.
|
||||
bool clean_card_before_refine(CardValue** const card_ptr_addr);
|
||||
// Refine the region corresponding to "card_ptr". Must be called after
|
||||
// being filtered by clean_card_before_refine(), and after proper
|
||||
// fence/synchronization.
|
||||
void refine_card_concurrently(CardValue* const card_ptr,
|
||||
const uint worker_id);
|
||||
|
||||
// Print accumulated summary info from the start of the VM.
|
||||
void print_summary_info();
|
||||
|
Loading…
Reference in New Issue
Block a user