8178148: Log more detailed information about scan rs phase

Add logging about the number of cards actually scanned, claimed and skipped during the Scan RS phase

Reviewed-by: ehelin, sangheki
This commit is contained in:
Thomas Schatzl 2017-06-14 11:26:44 +02:00
parent 32859294a2
commit 8467de8bbc
11 changed files with 140 additions and 85 deletions

View File

@ -3274,7 +3274,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
// investigate this in CR 7178365.
double sample_end_time_sec = os::elapsedTime();
double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS;
size_t total_cards_scanned = per_thread_states.total_cards_scanned();
size_t total_cards_scanned = g1_policy()->phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ScannedCards);
g1_policy()->record_collection_pause_end(pause_time_ms, total_cards_scanned, heap_used_bytes_before_gc);
evacuation_info.set_collectionset_used_before(collection_set()->bytes_used_before());
@ -3464,11 +3464,9 @@ public:
// treating the nmethods visited to act as roots for concurrent marking.
// We only want to make sure that the oops in the nmethods are adjusted with regard to the
// objects copied by the current evacuation.
size_t cards_scanned = _g1h->g1_rem_set()->oops_into_collection_set_do(&push_heap_rs_cl,
pss->closures()->weak_codeblobs(),
worker_id);
_pss->add_cards_scanned(worker_id, cards_scanned);
_g1h->g1_rem_set()->oops_into_collection_set_do(&push_heap_rs_cl,
pss->closures()->weak_codeblobs(),
worker_id);
double strong_roots_sec = os::elapsedTime() - start_strong_roots_sec;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -78,6 +78,13 @@ G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
_gc_par_phases[GCWorkerEnd] = new WorkerDataArray<double>(max_gc_threads, "GC Worker End (ms):");
_gc_par_phases[Other] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Other (ms):");
_scan_rs_scanned_cards = new WorkerDataArray<size_t>(max_gc_threads, "Scanned Cards:");
_gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_scanned_cards, ScannedCards);
_scan_rs_claimed_cards = new WorkerDataArray<size_t>(max_gc_threads, "Claimed Cards:");
_gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_claimed_cards, ClaimedCards);
_scan_rs_skipped_cards = new WorkerDataArray<size_t>(max_gc_threads, "Skipped Cards:");
_gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_skipped_cards, SkippedCards);
_update_rs_processed_buffers = new WorkerDataArray<size_t>(max_gc_threads, "Processed Buffers:");
_gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers);
@ -210,8 +217,8 @@ void G1GCPhaseTimes::add_time_secs(GCParPhases phase, uint worker_i, double secs
_gc_par_phases[phase]->add(worker_i, secs);
}
void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count) {
_gc_par_phases[phase]->set_thread_work_item(worker_i, count);
void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count, uint index) {
_gc_par_phases[phase]->set_thread_work_item(worker_i, count, index);
}
// return the average time for a phase in milliseconds
@ -219,9 +226,9 @@ double G1GCPhaseTimes::average_time_ms(GCParPhases phase) {
return _gc_par_phases[phase]->average() * 1000.0;
}
size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) {
assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
return _gc_par_phases[phase]->thread_work_items()->sum();
size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase, uint index) {
assert(_gc_par_phases[phase]->thread_work_items(index) != NULL, "No sub count");
return _gc_par_phases[phase]->thread_work_items(index)->sum();
}
template <class T>
@ -239,11 +246,13 @@ void G1GCPhaseTimes::log_phase(WorkerDataArray<double>* phase, uint indent, outp
phase->print_summary_on(out, print_sum);
details(phase, Indents[indent]);
WorkerDataArray<size_t>* work_items = phase->thread_work_items();
if (work_items != NULL) {
out->print("%s", Indents[indent + 1]);
work_items->print_summary_on(out, true);
details(work_items, Indents[indent + 1]);
for (uint i = 0; i < phase->MaxThreadWorkItems; i++) {
WorkerDataArray<size_t>* work_items = phase->thread_work_items(i);
if (work_items != NULL) {
out->print("%s", Indents[indent + 1]);
work_items->print_summary_on(out, true);
details(work_items, Indents[indent + 1]);
}
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -76,6 +76,12 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
GCParPhasesSentinel
};
enum GCScanRSWorkItems {
ScannedCards,
ClaimedCards,
SkippedCards
};
private:
// Markers for grouping the phases in the GCPhases enum above
static const int GCMainParPhasesLast = GCWorkerEnd;
@ -83,8 +89,15 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
static const int StringDedupPhasesLast = StringDedupTableFixup;
WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel];
WorkerDataArray<size_t>* _update_rs_processed_buffers;
WorkerDataArray<size_t>* _scan_rs_scanned_cards;
WorkerDataArray<size_t>* _scan_rs_claimed_cards;
WorkerDataArray<size_t>* _scan_rs_skipped_cards;
WorkerDataArray<size_t>* _termination_attempts;
WorkerDataArray<size_t>* _redirtied_cards;
double _cur_collection_par_time_ms;
@ -170,12 +183,12 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
// add a number of seconds to a phase
void add_time_secs(GCParPhases phase, uint worker_i, double secs);
void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count);
void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count, uint index = 0);
// return the average time for a phase in milliseconds
double average_time_ms(GCParPhases phase);
size_t sum_thread_work_items(GCParPhases phase);
size_t sum_thread_work_items(GCParPhases phase, uint index = 0);
public:

View File

@ -337,16 +337,6 @@ G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id)
return _states[worker_id];
}
void G1ParScanThreadStateSet::add_cards_scanned(uint worker_id, size_t cards_scanned) {
assert(worker_id < _n_workers, "out of bounds access");
_cards_scanned[worker_id] += cards_scanned;
}
size_t G1ParScanThreadStateSet::total_cards_scanned() const {
assert(_flushed, "thread local state from the per thread states should have been flushed");
return _total_cards_scanned;
}
const size_t* G1ParScanThreadStateSet::surviving_young_words() const {
assert(_flushed, "thread local state from the per thread states should have been flushed");
return _surviving_young_words_total;
@ -354,7 +344,6 @@ const size_t* G1ParScanThreadStateSet::surviving_young_words() const {
void G1ParScanThreadStateSet::flush() {
assert(!_flushed, "thread local state from the per thread states should be flushed once");
assert(_total_cards_scanned == 0, "should have been cleared");
for (uint worker_index = 0; worker_index < _n_workers; ++worker_index) {
G1ParScanThreadState* pss = _states[worker_index];
@ -363,8 +352,6 @@ void G1ParScanThreadStateSet::flush() {
continue;
}
_total_cards_scanned += _cards_scanned[worker_index];
pss->flush(_surviving_young_words_total);
delete pss;
_states[worker_index] = NULL;

View File

@ -198,8 +198,6 @@ class G1ParScanThreadStateSet : public StackObj {
G1CollectedHeap* _g1h;
G1ParScanThreadState** _states;
size_t* _surviving_young_words_total;
size_t* _cards_scanned;
size_t _total_cards_scanned;
size_t _young_cset_length;
uint _n_workers;
bool _flushed;
@ -209,8 +207,6 @@ class G1ParScanThreadStateSet : public StackObj {
_g1h(g1h),
_states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC)),
_surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, young_cset_length, mtGC)),
_cards_scanned(NEW_C_HEAP_ARRAY(size_t, n_workers, mtGC)),
_total_cards_scanned(0),
_young_cset_length(young_cset_length),
_n_workers(n_workers),
_flushed(false) {
@ -218,22 +214,18 @@ class G1ParScanThreadStateSet : public StackObj {
_states[i] = NULL;
}
memset(_surviving_young_words_total, 0, young_cset_length * sizeof(size_t));
memset(_cards_scanned, 0, n_workers * sizeof(size_t));
}
~G1ParScanThreadStateSet() {
assert(_flushed, "thread local state from the per thread states should have been flushed");
FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
}
void flush();
G1ParScanThreadState* state_for_worker(uint worker_id);
void add_cards_scanned(uint worker_id, size_t cards_scanned);
size_t total_cards_scanned() const;
const size_t* surviving_young_words() const;
private:

View File

@ -334,8 +334,9 @@ G1ScanRSClosure::G1ScanRSClosure(G1RemSetScanState* scan_state,
_push_heap_cl(push_heap_cl),
_code_root_cl(code_root_cl),
_strong_code_root_scan_time_sec(0.0),
_cards(0),
_cards_done(0),
_cards_claimed(0),
_cards_scanned(0),
_cards_skipped(0),
_worker_i(worker_i) {
_g1h = G1CollectedHeap::heap();
_bot = _g1h->bot();
@ -354,7 +355,7 @@ void G1ScanRSClosure::scan_card(size_t index, HeapWord* card_start, HeapRegion *
_ct_bs->set_card_claimed(index);
_push_heap_cl->set_region(r);
r->oops_on_card_seq_iterate_careful<true>(mr, _push_heap_cl);
_cards_done++;
_cards_scanned++;
}
}
@ -389,12 +390,13 @@ bool G1ScanRSClosure::doHeapRegion(HeapRegion* r) {
claimed_card_block = _scan_state->iter_claimed_next(region_idx, _block_size);
}
if (current_card < claimed_card_block) {
_cards_skipped++;
continue;
}
HeapWord* card_start = _g1h->bot()->address_for_index(card_index);
HeapRegion* card_region = _g1h->heap_region_containing(card_start);
_cards++;
_cards_claimed++;
_scan_state->add_dirty_region(card_region->hrm_index());
@ -411,21 +413,25 @@ bool G1ScanRSClosure::doHeapRegion(HeapRegion* r) {
return false;
}
size_t G1RemSet::scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
CodeBlobClosure* heap_region_codeblobs,
uint worker_i) {
void G1RemSet::scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
CodeBlobClosure* heap_region_codeblobs,
uint worker_i) {
double rs_time_start = os::elapsedTime();
G1ScanRSClosure cl(_scan_state, oops_in_heap_closure, heap_region_codeblobs, worker_i);
_g1->collection_set_iterate_from(&cl, worker_i);
double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
cl.strong_code_root_scan_time_sec();
double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
cl.strong_code_root_scan_time_sec();
_g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
_g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, cl.strong_code_root_scan_time_sec());
G1GCPhaseTimes* p = _g1p->phase_times();
return cl.cards_done();
p->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_scanned(), G1GCPhaseTimes::ScannedCards);
p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_claimed(), G1GCPhaseTimes::ClaimedCards);
p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_skipped(), G1GCPhaseTimes::SkippedCards);
p->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, cl.strong_code_root_scan_time_sec());
}
// Closure used for updating RSets and recording references that
@ -483,9 +489,9 @@ void G1RemSet::cleanupHRRS() {
HeapRegionRemSet::cleanup();
}
size_t G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
CodeBlobClosure* heap_region_codeblobs,
uint worker_i) {
void G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
CodeBlobClosure* heap_region_codeblobs,
uint worker_i) {
// A DirtyCardQueue that is used to hold cards containing references
// that point into the collection set. This DCQ is associated with a
// special DirtyCardQueueSet (see g1CollectedHeap.hpp). Under normal
@ -498,7 +504,7 @@ size_t G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
DirtyCardQueue into_cset_dcq(&_into_cset_dirty_card_queue_set);
update_rem_set(&into_cset_dcq, cl, worker_i);
return scan_rem_set(cl, heap_region_codeblobs, worker_i);;
scan_rem_set(cl, heap_region_codeblobs, worker_i);;
}
void G1RemSet::prepare_for_oops_into_collection_set_do() {

View File

@ -108,12 +108,9 @@ public:
// partitioning the work to be done. It should be the same as
// the "i" passed to the calling thread's work(i) function.
// In the sequential case this param will be ignored.
//
// Returns the number of cards scanned while looking for pointers
// into the collection set.
size_t oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
CodeBlobClosure* heap_region_codeblobs,
uint worker_i);
void oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
CodeBlobClosure* heap_region_codeblobs,
uint worker_i);
// Prepare for and cleanup after an oops_into_collection_set_do
// call. Must call each of these once before and after (in sequential
@ -123,9 +120,9 @@ public:
void prepare_for_oops_into_collection_set_do();
void cleanup_after_oops_into_collection_set_do();
size_t scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
CodeBlobClosure* heap_region_codeblobs,
uint worker_i);
void scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
CodeBlobClosure* heap_region_codeblobs,
uint worker_i);
G1RemSetScanState* scan_state() const { return _scan_state; }
@ -185,8 +182,10 @@ public:
class G1ScanRSClosure : public HeapRegionClosure {
G1RemSetScanState* _scan_state;
size_t _cards_done;
size_t _cards;
size_t _cards_scanned;
size_t _cards_claimed;
size_t _cards_skipped;
G1CollectedHeap* _g1h;
G1ParPushHeapRSClosure* _push_heap_cl;
@ -213,8 +212,9 @@ public:
return _strong_code_root_scan_time_sec;
}
size_t cards_done() { return _cards_done;}
size_t cards_looked_up() { return _cards;}
size_t cards_scanned() const { return _cards_scanned; }
size_t cards_claimed() const { return _cards_claimed; }
size_t cards_skipped() const { return _cards_skipped; }
};
class UpdateRSOopClosure: public ExtendedOopClosure {

View File

@ -33,20 +33,25 @@ class outputStream;
template <class T>
class WorkerDataArray : public CHeapObj<mtGC> {
friend class WDAPrinter;
public:
static const uint MaxThreadWorkItems = 3;
private:
T* _data;
uint _length;
const char* _title;
WorkerDataArray<size_t>* _thread_work_items;
WorkerDataArray<size_t>* _thread_work_items[MaxThreadWorkItems];
public:
WorkerDataArray(uint length, const char* title);
~WorkerDataArray();
void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items);
void set_thread_work_item(uint worker_i, size_t value);
WorkerDataArray<size_t>* thread_work_items() const {
return _thread_work_items;
void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items, uint index = 0);
void set_thread_work_item(uint worker_i, size_t value, uint index = 0);
void add_thread_work_item(uint worker_i, size_t value, uint index = 0);
WorkerDataArray<size_t>* thread_work_items(uint index = 0) const {
assert(index < MaxThreadWorkItems, "Tried to access thread work item %u max %u", index, MaxThreadWorkItems);
return _thread_work_items[index];
}
static T uninitialized();

View File

@ -32,11 +32,13 @@
template <typename T>
WorkerDataArray<T>::WorkerDataArray(uint length, const char* title) :
_title(title),
_length(0),
_thread_work_items(NULL) {
_length(0) {
assert(length > 0, "Must have some workers to store data for");
_length = length;
_data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
for (uint i = 0; i < MaxThreadWorkItems; i++) {
_thread_work_items[i] = NULL;
}
reset();
}
@ -59,14 +61,23 @@ WorkerDataArray<T>::~WorkerDataArray() {
}
template <typename T>
void WorkerDataArray<T>::link_thread_work_items(WorkerDataArray<size_t>* thread_work_items) {
_thread_work_items = thread_work_items;
void WorkerDataArray<T>::link_thread_work_items(WorkerDataArray<size_t>* thread_work_items, uint index) {
assert(index < MaxThreadWorkItems, "Tried to access thread work item %u (max %u)", index, MaxThreadWorkItems);
_thread_work_items[index] = thread_work_items;
}
template <typename T>
void WorkerDataArray<T>::set_thread_work_item(uint worker_i, size_t value) {
assert(_thread_work_items != NULL, "No sub count");
_thread_work_items->set(worker_i, value);
void WorkerDataArray<T>::set_thread_work_item(uint worker_i, size_t value, uint index) {
assert(index < MaxThreadWorkItems, "Tried to access thread work item %u (max %u)", index, MaxThreadWorkItems);
assert(_thread_work_items[index] != NULL, "No sub count");
_thread_work_items[index]->set(worker_i, value);
}
template <typename T>
void WorkerDataArray<T>::add_thread_work_item(uint worker_i, size_t value, uint index) {
assert(index < MaxThreadWorkItems, "Tried to access thread work item %u (max %u)", index, MaxThreadWorkItems);
assert(_thread_work_items[index] != NULL, "No sub count");
_thread_work_items[index]->add(worker_i, value);
}
template <typename T>
@ -148,8 +159,10 @@ void WorkerDataArray<T>::print_details_on(outputStream* out) const {
template <typename T>
void WorkerDataArray<T>::reset() {
set_all(uninitialized());
if (_thread_work_items != NULL) {
_thread_work_items->reset();
for (uint i = 0; i < MaxThreadWorkItems; i++) {
if (_thread_work_items[i] != NULL) {
_thread_work_items[i]->reset();
}
}
}

View File

@ -94,7 +94,14 @@ public class TestGCLogMessages {
new LogMessageWithLevel("Other", Level.INFO),
// Update RS
new LogMessageWithLevel("Update RS", Level.DEBUG),
new LogMessageWithLevel("Processed Buffers", Level.DEBUG),
new LogMessageWithLevel("Scan HCC", Level.TRACE),
// Scan RS
new LogMessageWithLevel("Scan RS", Level.DEBUG),
new LogMessageWithLevel("Scanned Cards", Level.DEBUG),
new LogMessageWithLevel("Claimed Cards", Level.DEBUG),
new LogMessageWithLevel("Skipped Cards", Level.DEBUG),
// Ext Root Scan
new LogMessageWithLevel("Thread Roots", Level.TRACE),
new LogMessageWithLevel("StringTable Roots", Level.TRACE),

View File

@ -34,7 +34,11 @@ class WorkerDataArrayTest : public ::testing::Test {
protected:
WorkerDataArrayTest() :
title("Test array"),
array(3, title) {
array(3, title),
sub_item_title("Sub item array"),
sub_item(3, sub_item_title) {
array.link_thread_work_items(&sub_item);
}
const char* print_summary() {
@ -65,6 +69,9 @@ class WorkerDataArrayTest : public ::testing::Test {
const char* title;
WorkerDataArray<T> array;
const char* sub_item_title;
WorkerDataArray<size_t> sub_item;
private:
virtual const char* expected_summary() = 0;
virtual const char* expected_details() = 0;
@ -111,6 +118,10 @@ class BasicWorkerDataArrayTest : public WorkerDataArrayTest<size_t> {
array.set(0, 5);
array.set(1, 3);
array.set(2, 7);
array.set_thread_work_item(0, 1);
array.set_thread_work_item(1, 2);
array.set_thread_work_item(2, 3);
}
private:
@ -125,10 +136,12 @@ class BasicWorkerDataArrayTest : public WorkerDataArrayTest<size_t> {
TEST_VM_F(BasicWorkerDataArrayTest, sum_test) {
ASSERT_EQ(15u, array.sum());
ASSERT_EQ(6u, array.thread_work_items(0)->sum());
}
TEST_VM_F(BasicWorkerDataArrayTest, average_test) {
ASSERT_NEAR(5.0, array.average(), epsilon);
ASSERT_NEAR(2.0, array.thread_work_items(0)->average(), epsilon);
}
TEST_VM_F(BasicWorkerDataArrayTest, print_summary_on_test) {
@ -149,6 +162,16 @@ class AddWorkerDataArrayTest : public WorkerDataArrayTest<size_t> {
for (uint i = 0; i < 3; i++) {
array.add(i, 1);
}
WorkerDataArray<size_t>* sub_items = array.thread_work_items(0);
sub_items->set(0, 1);
sub_items->set(1, 2);
sub_items->set(2, 3);
for (uint i = 0; i < 3; i++) {
array.add_thread_work_item(i, 1);
}
}
private:
@ -163,10 +186,12 @@ class AddWorkerDataArrayTest : public WorkerDataArrayTest<size_t> {
TEST_VM_F(AddWorkerDataArrayTest, sum_test) {
ASSERT_EQ(18u, array.sum());
ASSERT_EQ(9u, array.thread_work_items(0)->sum());
}
TEST_VM_F(AddWorkerDataArrayTest, average_test) {
ASSERT_NEAR(6.0, array.average(), epsilon);
ASSERT_NEAR(3.0, array.thread_work_items(0)->average(), epsilon);
}
TEST_VM_F(AddWorkerDataArrayTest, print_summary_on_test) {