8296414: [BACKOUT] JDK-8295319: pending_cards_at_gc_start doesn't include cards in thread buffers

Reviewed-by: lkorinth
This commit is contained in:
Thomas Schatzl 2022-11-04 18:22:55 +00:00
parent 5b7e70645b
commit b847fb6877
7 changed files with 68 additions and 66 deletions

View File

@ -1063,7 +1063,7 @@ void G1CollectedHeap::abort_refinement() {
} }
// Discard all remembered set updates and reset refinement statistics. // Discard all remembered set updates and reset refinement statistics.
G1BarrierSet::dirty_card_queue_set().abandon_logs_and_stats(); G1BarrierSet::dirty_card_queue_set().abandon_logs();
assert(G1BarrierSet::dirty_card_queue_set().num_cards() == 0, assert(G1BarrierSet::dirty_card_queue_set().num_cards() == 0,
"DCQS should be empty"); "DCQS should be empty");
concurrent_refine()->get_and_reset_refinement_stats(); concurrent_refine()->get_and_reset_refinement_stats();

View File

@ -530,13 +530,16 @@ bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
return true; return true;
} }
void G1DirtyCardQueueSet::abandon_logs_and_stats() { void G1DirtyCardQueueSet::abandon_logs() {
assert_at_safepoint(); assert_at_safepoint();
abandon_completed_buffers();
_detached_refinement_stats.reset();
// Disable mutator refinement until concurrent refinement decides otherwise. // Disable mutator refinement until concurrent refinement decides otherwise.
set_mutator_refinement_threshold(SIZE_MAX); set_mutator_refinement_threshold(SIZE_MAX);
// Iterate over all the threads, resetting per-thread queues and stats. // Since abandon is done only at safepoints, we can safely manipulate
// these queues.
struct AbandonThreadLogClosure : public ThreadClosure { struct AbandonThreadLogClosure : public ThreadClosure {
G1DirtyCardQueueSet& _qset; G1DirtyCardQueueSet& _qset;
AbandonThreadLogClosure(G1DirtyCardQueueSet& qset) : _qset(qset) {} AbandonThreadLogClosure(G1DirtyCardQueueSet& qset) : _qset(qset) {}
@ -547,16 +550,9 @@ void G1DirtyCardQueueSet::abandon_logs_and_stats() {
} }
} closure(*this); } closure(*this);
Threads::threads_do(&closure); Threads::threads_do(&closure);
enqueue_all_paused_buffers();
abandon_completed_buffers();
// Reset stats from detached threads.
MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
_detached_refinement_stats.reset();
} }
void G1DirtyCardQueueSet::concatenate_logs_and_stats() { void G1DirtyCardQueueSet::concatenate_logs() {
assert_at_safepoint(); assert_at_safepoint();
// Disable mutator refinement until concurrent refinement decides otherwise. // Disable mutator refinement until concurrent refinement decides otherwise.
@ -566,39 +562,47 @@ void G1DirtyCardQueueSet::concatenate_logs_and_stats() {
// the global list of logs. // the global list of logs.
struct ConcatenateThreadLogClosure : public ThreadClosure { struct ConcatenateThreadLogClosure : public ThreadClosure {
G1DirtyCardQueueSet& _qset; G1DirtyCardQueueSet& _qset;
G1ConcurrentRefineStats _total_stats; ConcatenateThreadLogClosure(G1DirtyCardQueueSet& qset) : _qset(qset) {}
ConcatenateThreadLogClosure(G1DirtyCardQueueSet& qset) :
_qset{qset}, _total_stats{} {}
virtual void do_thread(Thread* t) { virtual void do_thread(Thread* t) {
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t); G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
// Flush the buffer if non-empty. Flush before accumulating and
// resetting stats, since flushing may modify the stats.
if ((queue.buffer() != nullptr) && if ((queue.buffer() != nullptr) &&
(queue.index() != _qset.buffer_size())) { (queue.index() != _qset.buffer_size())) {
_qset.flush_queue(queue); _qset.flush_queue(queue);
} }
G1ConcurrentRefineStats& qstats = *queue.refinement_stats();
_total_stats += qstats;
qstats.reset();
} }
} closure(*this); } closure(*this);
Threads::threads_do(&closure); Threads::threads_do(&closure);
_concatenated_refinement_stats = closure._total_stats;
enqueue_all_paused_buffers(); enqueue_all_paused_buffers();
verify_num_cards(); verify_num_cards();
}
G1ConcurrentRefineStats G1DirtyCardQueueSet::get_and_reset_refinement_stats() {
assert_at_safepoint();
// Since we're at a safepoint, there aren't any races with recording of
// detached refinement stats. In particular, there's no risk of double
// counting a thread that detaches after we've examined it but before
// we've processed the detached stats.
// Collect and reset stats for attached threads.
struct CollectStats : public ThreadClosure {
G1ConcurrentRefineStats _total_stats;
virtual void do_thread(Thread* t) {
G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t);
G1ConcurrentRefineStats& stats = *dcq.refinement_stats();
_total_stats += stats;
stats.reset();
}
} closure;
Threads::threads_do(&closure);
// Collect and reset stats from detached threads. // Collect and reset stats from detached threads.
MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag); MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
_concatenated_refinement_stats += _detached_refinement_stats; closure._total_stats += _detached_refinement_stats;
_detached_refinement_stats.reset(); _detached_refinement_stats.reset();
}
G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenated_refinement_stats() const { return closure._total_stats;
assert_at_safepoint();
return _concatenated_refinement_stats;
} }
void G1DirtyCardQueueSet::record_detached_refinement_stats(G1ConcurrentRefineStats* stats) { void G1DirtyCardQueueSet::record_detached_refinement_stats(G1ConcurrentRefineStats* stats) {

View File

@ -95,10 +95,10 @@ class G1DirtyCardQueueSet: public PtrQueueSet {
// //
// The paused buffers are conceptually an extension of the completed buffers // The paused buffers are conceptually an extension of the completed buffers
// queue, and operations which need to deal with all of the queued buffers // queue, and operations which need to deal with all of the queued buffers
// (such as concatenating or abandoning logs) also need to deal with any // (such as concatenate_logs) also need to deal with any paused buffers. In
// paused buffers. In general, if a safepoint performs a GC then the paused // general, if a safepoint performs a GC then the paused buffers will be
// buffers will be processed as part of it, and there won't be any paused // processed as part of it, and there won't be any paused buffers after a
// buffers after a GC safepoint. // GC safepoint.
class PausedBuffers { class PausedBuffers {
class PausedList : public CHeapObj<mtGC> { class PausedList : public CHeapObj<mtGC> {
BufferNode* volatile _head; BufferNode* volatile _head;
@ -175,7 +175,6 @@ class G1DirtyCardQueueSet: public PtrQueueSet {
G1FreeIdSet _free_ids; G1FreeIdSet _free_ids;
G1ConcurrentRefineStats _concatenated_refinement_stats;
G1ConcurrentRefineStats _detached_refinement_stats; G1ConcurrentRefineStats _detached_refinement_stats;
// Verify _num_cards == sum of cards in the completed queue. // Verify _num_cards == sum of cards in the completed queue.
@ -268,21 +267,17 @@ public:
size_t stop_at, size_t stop_at,
G1ConcurrentRefineStats* stats); G1ConcurrentRefineStats* stats);
// If a full collection is happening, reset per-thread refinement stats and // If a full collection is happening, reset partial logs, and release
// partial logs, and release completed logs. The full collection will make // completed ones: the full collection will make them all irrelevant.
// them all irrelevant. void abandon_logs();
// precondition: at safepoint.
void abandon_logs_and_stats();
// Collect and reset all the per-thread refinement stats. If any threads // If any threads have partial logs, add them to the global list of logs.
// have partial logs then add them to the global list. void concatenate_logs();
// precondition: at safepoint.
void concatenate_logs_and_stats();
// Return the total of mutator refinement stats for all threads. // Return the total of mutator refinement stats for all threads.
// Also resets the stats for the threads.
// precondition: at safepoint. // precondition: at safepoint.
// precondition: only call after concatenate_logs_and_stats. G1ConcurrentRefineStats get_and_reset_refinement_stats();
G1ConcurrentRefineStats concatenated_refinement_stats() const;
// Accumulate refinement stats from threads that are detaching. // Accumulate refinement stats from threads that are detaching.
void record_detached_refinement_stats(G1ConcurrentRefineStats* stats); void record_detached_refinement_stats(G1ConcurrentRefineStats* stats);

View File

@ -585,14 +585,12 @@ static void log_refinement_stats(const char* kind, const G1ConcurrentRefineStats
stats.dirtied_cards()); stats.dirtied_cards());
} }
void G1Policy::record_concurrent_refinement_stats(size_t pending_cards, void G1Policy::record_concurrent_refinement_stats() {
size_t thread_buffer_cards) { G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
_pending_cards_at_gc_start = pending_cards; _pending_cards_at_gc_start = dcqs.num_cards();
_analytics->report_dirtied_cards_in_thread_buffers(thread_buffer_cards);
// Collect per-thread stats, mostly from mutator activity. // Collect per-thread stats, mostly from mutator activity.
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); G1ConcurrentRefineStats mut_stats = dcqs.get_and_reset_refinement_stats();
G1ConcurrentRefineStats mut_stats = dcqs.concatenated_refinement_stats();
// Collect specialized concurrent refinement thread stats. // Collect specialized concurrent refinement thread stats.
G1ConcurrentRefine* cr = _g1h->concurrent_refine(); G1ConcurrentRefine* cr = _g1h->concurrent_refine();
@ -629,6 +627,11 @@ void G1Policy::record_concurrent_refinement_stats(size_t pending_cards,
} }
} }
void G1Policy::record_concatenate_dirty_card_logs(Tickspan concat_time, size_t num_cards) {
_analytics->report_dirtied_cards_in_thread_buffers(num_cards);
phase_times()->record_concatenate_dirty_card_logs_time_ms(concat_time.seconds() * MILLIUNITS);
}
void G1Policy::record_young_collection_start() { void G1Policy::record_young_collection_start() {
Ticks now = Ticks::now(); Ticks now = Ticks::now();
// We only need to do this here as the policy will only be applied // We only need to do this here as the policy will only be applied
@ -643,6 +646,8 @@ void G1Policy::record_young_collection_start() {
phase_times()->record_cur_collection_start_sec(now.seconds()); phase_times()->record_cur_collection_start_sec(now.seconds());
record_concurrent_refinement_stats();
_collection_set->reset_bytes_used_before(); _collection_set->reset_bytes_used_before();
// do that for any other surv rate groups // do that for any other surv rate groups

View File

@ -278,6 +278,9 @@ private:
// Indicate that we aborted marking before doing any mixed GCs. // Indicate that we aborted marking before doing any mixed GCs.
void abort_time_to_mixed_tracking(); void abort_time_to_mixed_tracking();
// Record and log stats before not-full collection.
void record_concurrent_refinement_stats();
public: public:
G1Policy(STWGCTimer* gc_timer); G1Policy(STWGCTimer* gc_timer);
@ -296,6 +299,8 @@ public:
// This should be called after the heap is resized. // This should be called after the heap is resized.
void record_new_heap_size(uint new_number_of_regions); void record_new_heap_size(uint new_number_of_regions);
void record_concatenate_dirty_card_logs(Tickspan concat_time, size_t num_cards);
void init(G1CollectedHeap* g1h, G1CollectionSet* collection_set); void init(G1CollectedHeap* g1h, G1CollectionSet* collection_set);
// Record the start and end of the young gc pause. // Record the start and end of the young gc pause.
@ -393,12 +398,6 @@ public:
void transfer_survivors_to_cset(const G1SurvivorRegions* survivors); void transfer_survivors_to_cset(const G1SurvivorRegions* survivors);
// Record and log stats and pending cards before not-full collection.
// thread_buffer_cards is the number of cards that were in per-thread
// buffers. pending_cards includes thread_buffer_cards.
void record_concurrent_refinement_stats(size_t pending_cards,
size_t thread_buffer_cards);
private: private:
// //
// Survivor regions policy. // Survivor regions policy.

View File

@ -467,16 +467,14 @@ void G1YoungCollector::set_young_collection_default_active_worker_threads(){
log_info(gc,task)("Using %u workers of %u for evacuation", active_workers, workers()->max_workers()); log_info(gc,task)("Using %u workers of %u for evacuation", active_workers, workers()->max_workers());
} }
void G1YoungCollector::concatenate_dirty_card_logs_and_stats() { void G1YoungCollector::flush_dirty_card_queues() {
Ticks start = Ticks::now(); Ticks start = Ticks::now();
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set(); G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
size_t old_cards = qset.num_cards(); size_t old_cards = qset.num_cards();
qset.concatenate_logs_and_stats(); qset.concatenate_logs();
size_t pending_cards = qset.num_cards(); size_t added_cards = qset.num_cards() - old_cards;
size_t thread_buffer_cards = pending_cards - old_cards; Tickspan concat_time = Ticks::now() - start;
policy()->record_concurrent_refinement_stats(pending_cards, thread_buffer_cards); policy()->record_concatenate_dirty_card_logs(concat_time, added_cards);
double concat_time = (Ticks::now() - start).seconds() * MILLIUNITS;
phase_times()->record_concatenate_dirty_card_logs_time_ms(concat_time);
} }
void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info, G1ParScanThreadStateSet* per_thread_states) { void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info, G1ParScanThreadStateSet* per_thread_states) {
@ -495,6 +493,10 @@ void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info,
phase_times()->record_prepare_tlab_time_ms((Ticks::now() - start).seconds() * 1000.0); phase_times()->record_prepare_tlab_time_ms((Ticks::now() - start).seconds() * 1000.0);
} }
// Flush dirty card queues to qset, so later phases don't need to account
// for partially filled per-thread queues and such.
flush_dirty_card_queues();
hot_card_cache()->reset_hot_cache_claimed_index(); hot_card_cache()->reset_hot_cache_claimed_index();
// Initialize the GC alloc regions. // Initialize the GC alloc regions.
@ -1071,9 +1073,6 @@ void G1YoungCollector::collect() {
// other trivial setup above). // other trivial setup above).
policy()->record_young_collection_start(); policy()->record_young_collection_start();
// Flush early, so later phases don't need to account for per-thread stuff.
concatenate_dirty_card_logs_and_stats();
calculate_collection_set(jtm.evacuation_info(), policy()->max_pause_time_ms()); calculate_collection_set(jtm.evacuation_info(), policy()->max_pause_time_ms());
G1RedirtyCardsQueueSet rdcqs(G1BarrierSet::dirty_card_queue_set().allocator()); G1RedirtyCardsQueueSet rdcqs(G1BarrierSet::dirty_card_queue_set().allocator());

View File

@ -98,7 +98,7 @@ class G1YoungCollector {
void set_young_collection_default_active_worker_threads(); void set_young_collection_default_active_worker_threads();
void concatenate_dirty_card_logs_and_stats(); void flush_dirty_card_queues();
void pre_evacuate_collection_set(G1EvacInfo* evacuation_info, G1ParScanThreadStateSet* pss); void pre_evacuate_collection_set(G1EvacInfo* evacuation_info, G1ParScanThreadStateSet* pss);
// Actually do the work of evacuating the parts of the collection set. // Actually do the work of evacuating the parts of the collection set.