8295319: pending_cards_at_gc_start doesn't include cards in thread buffers

Reviewed-by: iwalulya, tschatzl
This commit is contained in:
Kim Barrett 2022-11-03 23:35:37 +00:00
parent 8f6c048911
commit 034980393f
7 changed files with 66 additions and 68 deletions

View File

@ -1063,7 +1063,7 @@ void G1CollectedHeap::abort_refinement() {
}
// Discard all remembered set updates and reset refinement statistics.
G1BarrierSet::dirty_card_queue_set().abandon_logs();
G1BarrierSet::dirty_card_queue_set().abandon_logs_and_stats();
assert(G1BarrierSet::dirty_card_queue_set().num_cards() == 0,
"DCQS should be empty");
concurrent_refine()->get_and_reset_refinement_stats();

View File

@ -530,16 +530,13 @@ bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
return true;
}
void G1DirtyCardQueueSet::abandon_logs() {
void G1DirtyCardQueueSet::abandon_logs_and_stats() {
assert_at_safepoint();
abandon_completed_buffers();
_detached_refinement_stats.reset();
// Disable mutator refinement until concurrent refinement decides otherwise.
set_mutator_refinement_threshold(SIZE_MAX);
// Since abandon is done only at safepoints, we can safely manipulate
// these queues.
// Iterate over all the threads, resetting per-thread queues and stats.
struct AbandonThreadLogClosure : public ThreadClosure {
G1DirtyCardQueueSet& _qset;
AbandonThreadLogClosure(G1DirtyCardQueueSet& qset) : _qset(qset) {}
@ -550,9 +547,16 @@ void G1DirtyCardQueueSet::abandon_logs() {
}
} closure(*this);
Threads::threads_do(&closure);
enqueue_all_paused_buffers();
abandon_completed_buffers();
// Reset stats from detached threads.
MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
_detached_refinement_stats.reset();
}
void G1DirtyCardQueueSet::concatenate_logs() {
void G1DirtyCardQueueSet::concatenate_logs_and_stats() {
assert_at_safepoint();
// Disable mutator refinement until concurrent refinement decides otherwise.
@ -562,47 +566,39 @@ void G1DirtyCardQueueSet::concatenate_logs() {
// the global list of logs.
struct ConcatenateThreadLogClosure : public ThreadClosure {
G1DirtyCardQueueSet& _qset;
ConcatenateThreadLogClosure(G1DirtyCardQueueSet& qset) : _qset(qset) {}
G1ConcurrentRefineStats _total_stats;
ConcatenateThreadLogClosure(G1DirtyCardQueueSet& qset) :
_qset{qset}, _total_stats{} {}
virtual void do_thread(Thread* t) {
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
// Flush the buffer if non-empty. Flush before accumulating and
// resetting stats, since flushing may modify the stats.
if ((queue.buffer() != nullptr) &&
(queue.index() != _qset.buffer_size())) {
_qset.flush_queue(queue);
}
G1ConcurrentRefineStats& qstats = *queue.refinement_stats();
_total_stats += qstats;
qstats.reset();
}
} closure(*this);
Threads::threads_do(&closure);
_concatenated_refinement_stats = closure._total_stats;
enqueue_all_paused_buffers();
verify_num_cards();
}
G1ConcurrentRefineStats G1DirtyCardQueueSet::get_and_reset_refinement_stats() {
assert_at_safepoint();
// Since we're at a safepoint, there aren't any races with recording of
// detached refinement stats. In particular, there's no risk of double
// counting a thread that detaches after we've examined it but before
// we've processed the detached stats.
// Collect and reset stats for attached threads.
struct CollectStats : public ThreadClosure {
G1ConcurrentRefineStats _total_stats;
virtual void do_thread(Thread* t) {
G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t);
G1ConcurrentRefineStats& stats = *dcq.refinement_stats();
_total_stats += stats;
stats.reset();
}
} closure;
Threads::threads_do(&closure);
// Collect and reset stats from detached threads.
MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
closure._total_stats += _detached_refinement_stats;
_concatenated_refinement_stats += _detached_refinement_stats;
_detached_refinement_stats.reset();
}
return closure._total_stats;
G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenated_refinement_stats() const {
assert_at_safepoint();
return _concatenated_refinement_stats;
}
void G1DirtyCardQueueSet::record_detached_refinement_stats(G1ConcurrentRefineStats* stats) {

View File

@ -95,10 +95,10 @@ class G1DirtyCardQueueSet: public PtrQueueSet {
//
// The paused buffers are conceptually an extension of the completed buffers
// queue, and operations which need to deal with all of the queued buffers
// (such as concatenate_logs) also need to deal with any paused buffers. In
// general, if a safepoint performs a GC then the paused buffers will be
// processed as part of it, and there won't be any paused buffers after a
// GC safepoint.
// (such as concatenating or abandoning logs) also need to deal with any
// paused buffers. In general, if a safepoint performs a GC then the paused
// buffers will be processed as part of it, and there won't be any paused
// buffers after a GC safepoint.
class PausedBuffers {
class PausedList : public CHeapObj<mtGC> {
BufferNode* volatile _head;
@ -175,6 +175,7 @@ class G1DirtyCardQueueSet: public PtrQueueSet {
G1FreeIdSet _free_ids;
G1ConcurrentRefineStats _concatenated_refinement_stats;
G1ConcurrentRefineStats _detached_refinement_stats;
// Verify _num_cards == sum of cards in the completed queue.
@ -267,17 +268,21 @@ public:
size_t stop_at,
G1ConcurrentRefineStats* stats);
// If a full collection is happening, reset partial logs, and release
// completed ones: the full collection will make them all irrelevant.
void abandon_logs();
// If a full collection is happening, reset per-thread refinement stats and
// partial logs, and release completed logs. The full collection will make
// them all irrelevant.
// precondition: at safepoint.
void abandon_logs_and_stats();
// If any threads have partial logs, add them to the global list of logs.
void concatenate_logs();
// Collect and reset all the per-thread refinement stats. If any threads
// have partial logs then add them to the global list.
// precondition: at safepoint.
void concatenate_logs_and_stats();
// Return the total of mutator refinement stats for all threads.
// Also resets the stats for the threads.
// precondition: at safepoint.
G1ConcurrentRefineStats get_and_reset_refinement_stats();
// precondition: only call after concatenate_logs_and_stats.
G1ConcurrentRefineStats concatenated_refinement_stats() const;
// Accumulate refinement stats from threads that are detaching.
void record_detached_refinement_stats(G1ConcurrentRefineStats* stats);

View File

@ -585,12 +585,14 @@ static void log_refinement_stats(const char* kind, const G1ConcurrentRefineStats
stats.dirtied_cards());
}
void G1Policy::record_concurrent_refinement_stats() {
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
_pending_cards_at_gc_start = dcqs.num_cards();
void G1Policy::record_concurrent_refinement_stats(size_t pending_cards,
size_t thread_buffer_cards) {
_pending_cards_at_gc_start = pending_cards;
_analytics->report_dirtied_cards_in_thread_buffers(thread_buffer_cards);
// Collect per-thread stats, mostly from mutator activity.
G1ConcurrentRefineStats mut_stats = dcqs.get_and_reset_refinement_stats();
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
G1ConcurrentRefineStats mut_stats = dcqs.concatenated_refinement_stats();
// Collect specialized concurrent refinement thread stats.
G1ConcurrentRefine* cr = _g1h->concurrent_refine();
@ -627,11 +629,6 @@ void G1Policy::record_concurrent_refinement_stats() {
}
}
void G1Policy::record_concatenate_dirty_card_logs(Tickspan concat_time, size_t num_cards) {
_analytics->report_dirtied_cards_in_thread_buffers(num_cards);
phase_times()->record_concatenate_dirty_card_logs_time_ms(concat_time.seconds() * MILLIUNITS);
}
void G1Policy::record_young_collection_start() {
Ticks now = Ticks::now();
// We only need to do this here as the policy will only be applied
@ -646,8 +643,6 @@ void G1Policy::record_young_collection_start() {
phase_times()->record_cur_collection_start_sec(now.seconds());
record_concurrent_refinement_stats();
_collection_set->reset_bytes_used_before();
// do that for any other surv rate groups

View File

@ -278,9 +278,6 @@ private:
// Indicate that we aborted marking before doing any mixed GCs.
void abort_time_to_mixed_tracking();
// Record and log stats before not-full collection.
void record_concurrent_refinement_stats();
public:
G1Policy(STWGCTimer* gc_timer);
@ -299,8 +296,6 @@ public:
// This should be called after the heap is resized.
void record_new_heap_size(uint new_number_of_regions);
void record_concatenate_dirty_card_logs(Tickspan concat_time, size_t num_cards);
void init(G1CollectedHeap* g1h, G1CollectionSet* collection_set);
// Record the start and end of the young gc pause.
@ -398,6 +393,12 @@ public:
void transfer_survivors_to_cset(const G1SurvivorRegions* survivors);
// Record and log stats and pending cards before not-full collection.
// thread_buffer_cards is the number of cards that were in per-thread
// buffers. pending_cards includes thread_buffer_cards.
void record_concurrent_refinement_stats(size_t pending_cards,
size_t thread_buffer_cards);
private:
//
// Survivor regions policy.

View File

@ -467,14 +467,16 @@ void G1YoungCollector::set_young_collection_default_active_worker_threads(){
log_info(gc,task)("Using %u workers of %u for evacuation", active_workers, workers()->max_workers());
}
void G1YoungCollector::flush_dirty_card_queues() {
void G1YoungCollector::concatenate_dirty_card_logs_and_stats() {
Ticks start = Ticks::now();
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
size_t old_cards = qset.num_cards();
qset.concatenate_logs();
size_t added_cards = qset.num_cards() - old_cards;
Tickspan concat_time = Ticks::now() - start;
policy()->record_concatenate_dirty_card_logs(concat_time, added_cards);
qset.concatenate_logs_and_stats();
size_t pending_cards = qset.num_cards();
size_t thread_buffer_cards = pending_cards - old_cards;
policy()->record_concurrent_refinement_stats(pending_cards, thread_buffer_cards);
double concat_time = (Ticks::now() - start).seconds() * MILLIUNITS;
phase_times()->record_concatenate_dirty_card_logs_time_ms(concat_time);
}
void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info, G1ParScanThreadStateSet* per_thread_states) {
@ -493,10 +495,6 @@ void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info,
phase_times()->record_prepare_tlab_time_ms((Ticks::now() - start).seconds() * 1000.0);
}
// Flush dirty card queues to qset, so later phases don't need to account
// for partially filled per-thread queues and such.
flush_dirty_card_queues();
hot_card_cache()->reset_hot_cache_claimed_index();
// Initialize the GC alloc regions.
@ -1073,6 +1071,9 @@ void G1YoungCollector::collect() {
// other trivial setup above).
policy()->record_young_collection_start();
// Flush early, so later phases don't need to account for per-thread stuff.
concatenate_dirty_card_logs_and_stats();
calculate_collection_set(jtm.evacuation_info(), policy()->max_pause_time_ms());
G1RedirtyCardsQueueSet rdcqs(G1BarrierSet::dirty_card_queue_set().allocator());

View File

@ -98,7 +98,7 @@ class G1YoungCollector {
void set_young_collection_default_active_worker_threads();
void flush_dirty_card_queues();
void concatenate_dirty_card_logs_and_stats();
void pre_evacuate_collection_set(G1EvacInfo* evacuation_info, G1ParScanThreadStateSet* pss);
// Actually do the work of evacuating the parts of the collection set.