7143511: G1: Another instance of high GC Worker Other time (50ms)

Tiered compilation has increased the number of nmethods in the code cache. This has, in turn, significantly increased the number of marked nmethods processed during the StrongRootsScope destructor. Create a specialized version of CodeBlobToOopClosure for G1 which places only those nmethods that contain pointers into the collection set on to the marked nmethods list.

Reviewed-by: iveresov, tonyp
This commit is contained in:
John Cuthbertson 2012-03-13 11:05:32 -07:00
parent a6d84bc811
commit 79c90a15fa
4 changed files with 175 additions and 79 deletions

View File

@ -553,7 +553,7 @@ public:
static void oops_do_marking_prologue();
static void oops_do_marking_epilogue();
static bool oops_do_marking_is_active() { return _oops_do_mark_nmethods != NULL; }
DEBUG_ONLY(bool test_oops_do_mark() { return _oops_do_mark_link != NULL; })
bool test_oops_do_mark() { return _oops_do_mark_link != NULL; }
// ScopeDesc for an instruction
ScopeDesc* scope_desc_at(address pc);

View File

@ -4675,6 +4675,7 @@ public:
double start_time_ms = os::elapsedTime() * 1000.0;
_g1h->g1_policy()->record_gc_worker_start_time(worker_id, start_time_ms);
{
ResourceMark rm;
HandleMark hm;
@ -4736,6 +4737,12 @@ public:
}
assert(pss.refs()->is_empty(), "should be empty");
// Close the inner scope so that the ResourceMark and HandleMark
// destructors are executed here and are included as part of the
// "GC Worker Time".
}
double end_time_ms = os::elapsedTime() * 1000.0;
_g1h->g1_policy()->record_gc_worker_end_time(worker_id, end_time_ms);
}
@ -4743,6 +4750,67 @@ public:
// *** Common G1 Evacuation Stuff
// Closures that support the filtering of CodeBlobs scanned during
// external root scanning.
// Closure applied to reference fields in code blobs (specifically nmethods)
// to determine whether an nmethod contains references that point into
// the collection set. Used as a predicate when walking code roots so
// that only nmethods that point into the collection set are added to the
// 'marked' list.
class G1FilteredCodeBlobToOopClosure : public CodeBlobToOopClosure {
class G1PointsIntoCSOopClosure : public OopClosure {
G1CollectedHeap* _g1;
bool _points_into_cs;
public:
G1PointsIntoCSOopClosure(G1CollectedHeap* g1) :
_g1(g1), _points_into_cs(false) { }
bool points_into_cs() const { return _points_into_cs; }
template <class T>
void do_oop_nv(T* p) {
if (!_points_into_cs) {
T heap_oop = oopDesc::load_heap_oop(p);
if (!oopDesc::is_null(heap_oop) &&
_g1->in_cset_fast_test(oopDesc::decode_heap_oop_not_null(heap_oop))) {
_points_into_cs = true;
}
}
}
virtual void do_oop(oop* p) { do_oop_nv(p); }
virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
};
G1CollectedHeap* _g1;
public:
G1FilteredCodeBlobToOopClosure(G1CollectedHeap* g1, OopClosure* cl) :
CodeBlobToOopClosure(cl, true), _g1(g1) { }
virtual void do_code_blob(CodeBlob* cb) {
nmethod* nm = cb->as_nmethod_or_null();
if (nm != NULL && !(nm->test_oops_do_mark())) {
G1PointsIntoCSOopClosure predicate_cl(_g1);
nm->oops_do(&predicate_cl);
if (predicate_cl.points_into_cs()) {
// At least one of the reference fields or the oop relocations
// in the nmethod points into the collection set. We have to
// 'mark' this nmethod.
// Note: Revisit the following if CodeBlobToOopClosure::do_code_blob()
// or MarkingCodeBlobClosure::do_code_blob() change.
if (!nm->test_set_oops_do_mark()) {
do_newly_marked_nmethod(nm);
}
}
}
}
};
// This method is run in a GC worker.
void
@ -4764,7 +4832,7 @@ g1_process_strong_roots(bool collecting_perm_gen,
// Walk the code cache w/o buffering, because StarTask cannot handle
// unaligned oop locations.
CodeBlobToOopClosure eager_scan_code_roots(scan_non_heap_roots, /*do_marking=*/ true);
G1FilteredCodeBlobToOopClosure eager_scan_code_roots(this, scan_non_heap_roots);
process_strong_roots(false, // no scoping; this is parallel code
collecting_perm_gen, so,
@ -5378,11 +5446,14 @@ void G1CollectedHeap::evacuate_collection_set() {
rem_set()->prepare_for_younger_refs_iterate(true);
assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty");
double start_par = os::elapsedTime();
double start_par_time_sec = os::elapsedTime();
double end_par_time_sec;
{
StrongRootsScope srs(this);
if (G1CollectedHeap::use_parallel_gc_threads()) {
// The individual threads will set their evac-failure closures.
StrongRootsScope srs(this);
if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
// These tasks use ShareHeap::_process_strong_tasks
assert(UseDynamicNumberOfGCThreads ||
@ -5390,13 +5461,24 @@ void G1CollectedHeap::evacuate_collection_set() {
"If not dynamic should be using all the workers");
workers()->run_task(&g1_par_task);
} else {
StrongRootsScope srs(this);
g1_par_task.set_for_termination(n_workers);
g1_par_task.work(0);
}
end_par_time_sec = os::elapsedTime();
double par_time = (os::elapsedTime() - start_par) * 1000.0;
g1_policy()->record_par_time(par_time);
// Closing the inner scope will execute the destructor
// for the StrongRootsScope object. We record the current
// elapsed time before closing the scope so that time
// taken for the SRS destructor is NOT included in the
// reported parallel time.
}
double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0;
g1_policy()->record_par_time(par_time_ms);
double code_root_fixup_time_ms =
(os::elapsedTime() - end_par_time_sec) * 1000.0;
g1_policy()->record_code_root_fixup_time(code_root_fixup_time_ms);
set_par_threads(0);

View File

@ -1261,6 +1261,9 @@ void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
other_time_ms -= known_time;
}
// Now subtract the time taken to fix up roots in generated code
other_time_ms -= _cur_collection_code_root_fixup_time_ms;
// Subtract the time taken to clean the card table from the
// current value of "other time"
other_time_ms -= _cur_clear_ct_time_ms;
@ -1401,10 +1404,10 @@ void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
print_par_stats(2, "Termination", _par_last_termination_times_ms);
print_par_sizes(3, "Termination Attempts", _par_last_termination_attempts);
print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms);
for (int i = 0; i < _parallel_gc_threads; i++) {
_par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
_par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] -
_par_last_gc_worker_start_times_ms[i];
double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
_par_last_satb_filtering_times_ms[i] +
@ -1413,10 +1416,13 @@ void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
_par_last_obj_copy_times_ms[i] +
_par_last_termination_times_ms[i];
_par_last_gc_worker_other_times_ms[i] = _cur_collection_par_time_ms - worker_known_time;
_par_last_gc_worker_other_times_ms[i] = _par_last_gc_worker_times_ms[i] -
worker_known_time;
}
print_par_stats(2, "GC Worker", _par_last_gc_worker_times_ms);
print_par_stats(2, "GC Worker Other", _par_last_gc_worker_other_times_ms);
print_par_stats(2, "GC Worker Total", _par_last_gc_worker_times_ms);
print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms);
} else {
print_stats(1, "Ext Root Scanning", ext_root_scan_time);
if (print_marking_info) {
@ -1427,6 +1433,7 @@ void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
print_stats(1, "Scan RS", scan_rs_time);
print_stats(1, "Object Copying", obj_copy_time);
}
print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
if (print_marking_info) {
print_stats(1, "Complete CSet Marking", _mark_closure_time_ms);
}

View File

@ -179,6 +179,9 @@ private:
size_t _cur_collection_pause_used_at_start_bytes;
size_t _cur_collection_pause_used_regions_at_start;
double _cur_collection_par_time_ms;
double _cur_collection_code_root_fixup_time_ms;
double _cur_satb_drain_time_ms;
double _cur_clear_ct_time_ms;
double _cur_ref_proc_time_ms;
@ -226,8 +229,8 @@ private:
double* _par_last_gc_worker_times_ms;
// Each workers 'other' time i.e. the elapsed time of the parallel
// phase of the pause minus the sum of the individual sub-phase
// times for a given worker thread.
// code executed by a worker minus the sum of the individual sub-phase
// times for that worker thread.
double* _par_last_gc_worker_other_times_ms;
// indicates whether we are in young or mixed GC mode
@ -897,6 +900,10 @@ public:
_cur_collection_par_time_ms = ms;
}
void record_code_root_fixup_time(double ms) {
_cur_collection_code_root_fixup_time_ms = ms;
}
void record_aux_start_time(int i) {
guarantee(i < _aux_num, "should be within range");
_cur_aux_start_times_ms[i] = os::elapsedTime() * 1000.0;