6608385: G1: need to support parallel reference processing

Implement support for ParallelRefProcEnabled in the reference processing that takes place at the end of G1 concurrent marking.

Reviewed-by: tonyp, ysr
This commit is contained in:
John Cuthbertson 2011-01-25 10:56:22 -08:00
parent 14f4450d25
commit ecdb5848eb
4 changed files with 296 additions and 29 deletions

View File

@ -1055,7 +1055,12 @@ public:
do {
double start_vtime_sec = os::elapsedVTime();
double start_time_sec = os::elapsedTime();
the_task->do_marking_step(10.0);
double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
the_task->do_marking_step(mark_step_duration_ms,
true /* do_stealing */,
true /* do_termination */);
double end_time_sec = os::elapsedTime();
double end_vtime_sec = os::elapsedVTime();
double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
@ -1111,7 +1116,8 @@ void ConcurrentMark::markFromRoots() {
_restart_for_overflow = false;
set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
set_phase(active_workers, true /* concurrent */);
CMConcurrentMarkingTask markingTask(this, cmThread());
if (parallel_marking_threads() > 0)
@ -1176,6 +1182,12 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
/* silent */ false,
/* use_prev_marking */ false);
}
assert(!restart_for_overflow(), "sanity");
}
// Reset the marking state if marking completed
if (!restart_for_overflow()) {
set_non_marking_state();
}
#if VERIFY_OBJS_PROCESSED
@ -1853,6 +1865,8 @@ void ConcurrentMark::completeCleanup() {
assert(local_free_list.is_empty(), "post-condition");
}
// Support closures for reference procssing in G1
bool G1CMIsAliveClosure::do_object_b(oop obj) {
HeapWord* addr = (HeapWord*)obj;
return addr != NULL &&
@ -1873,11 +1887,17 @@ class G1CMKeepAliveClosure: public OopClosure {
virtual void do_oop( oop* p) { do_oop_work(p); }
template <class T> void do_oop_work(T* p) {
oop thisOop = oopDesc::load_decode_heap_oop(p);
HeapWord* addr = (HeapWord*)thisOop;
if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
oop obj = oopDesc::load_decode_heap_oop(p);
HeapWord* addr = (HeapWord*)obj;
if (_cm->verbose_high())
gclog_or_tty->print_cr("\t[0] we're looking at location "
"*"PTR_FORMAT" = "PTR_FORMAT,
p, (void*) obj);
if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
_bitMap->mark(addr);
_cm->mark_stack_push(thisOop);
_cm->mark_stack_push(obj);
}
}
};
@ -1899,6 +1919,199 @@ class G1CMDrainMarkingStackClosure: public VoidClosure {
}
};
// 'Keep Alive' closure used by parallel reference processing.
// An instance of this closure is used in the parallel reference processing
// code rather than an instance of G1CMKeepAliveClosure. We could have used
// the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
// placed on to discovered ref lists once so we can mark and push with no
// need to check whether the object has already been marked. Using the
// G1CMKeepAliveClosure would mean, however, having all the worker threads
// operating on the global mark stack. This means that an individual
// worker would be doing lock-free pushes while it processes its own
// discovered ref list followed by drain call. If the discovered ref lists
// are unbalanced then this could cause interference with the other
// workers. Using a CMTask (and its embedded local data structures)
// avoids that potential interference.
class G1CMParKeepAliveAndDrainClosure: public OopClosure {
ConcurrentMark* _cm;
CMTask* _task;
CMBitMap* _bitMap;
int _ref_counter_limit;
int _ref_counter;
public:
G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
CMTask* task,
CMBitMap* bitMap) :
_cm(cm), _task(task), _bitMap(bitMap),
_ref_counter_limit(G1RefProcDrainInterval)
{
assert(_ref_counter_limit > 0, "sanity");
_ref_counter = _ref_counter_limit;
}
virtual void do_oop(narrowOop* p) { do_oop_work(p); }
virtual void do_oop( oop* p) { do_oop_work(p); }
template <class T> void do_oop_work(T* p) {
if (!_cm->has_overflown()) {
oop obj = oopDesc::load_decode_heap_oop(p);
if (_cm->verbose_high())
gclog_or_tty->print_cr("\t[%d] we're looking at location "
"*"PTR_FORMAT" = "PTR_FORMAT,
_task->task_id(), p, (void*) obj);
_task->deal_with_reference(obj);
_ref_counter--;
if (_ref_counter == 0) {
// We have dealt with _ref_counter_limit references, pushing them and objects
// reachable from them on to the local stack (and possibly the global stack).
// Call do_marking_step() to process these entries. We call the routine in a
// loop, which we'll exit if there's nothing more to do (i.e. we're done
// with the entries that we've pushed as a result of the deal_with_reference
// calls above) or we overflow.
// Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
// while there may still be some work to do. (See the comment at the
// beginning of CMTask::do_marking_step() for those conditions - one of which
// is reaching the specified time target.) It is only when
// CMTask::do_marking_step() returns without setting the has_aborted() flag
// that the marking has completed.
do {
double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
_task->do_marking_step(mark_step_duration_ms,
false /* do_stealing */,
false /* do_termination */);
} while (_task->has_aborted() && !_cm->has_overflown());
_ref_counter = _ref_counter_limit;
}
} else {
if (_cm->verbose_high())
gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
}
}
};
class G1CMParDrainMarkingStackClosure: public VoidClosure {
ConcurrentMark* _cm;
CMTask* _task;
public:
G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
_cm(cm), _task(task)
{}
void do_void() {
do {
if (_cm->verbose_high())
gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", _task->task_id());
// We call CMTask::do_marking_step() to completely drain the local and
// global marking stacks. The routine is called in a loop, which we'll
// exit if there's nothing more to do (i.e. we'completely drained the
// entries that were pushed as a result of applying the
// G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
// lists above) or we overflow the global marking stack.
// Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
// while there may still be some work to do. (See the comment at the
// beginning of CMTask::do_marking_step() for those conditions - one of which
// is reaching the specified time target.) It is only when
// CMTask::do_marking_step() returns without setting the has_aborted() flag
// that the marking has completed.
_task->do_marking_step(1000000000.0 /* something very large */,
true /* do_stealing */,
true /* do_termination */);
} while (_task->has_aborted() && !_cm->has_overflown());
}
};
// Implementation of AbstractRefProcTaskExecutor for G1
class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor {
private:
G1CollectedHeap* _g1h;
ConcurrentMark* _cm;
CMBitMap* _bitmap;
WorkGang* _workers;
int _active_workers;
public:
G1RefProcTaskExecutor(G1CollectedHeap* g1h,
ConcurrentMark* cm,
CMBitMap* bitmap,
WorkGang* workers,
int n_workers) :
_g1h(g1h), _cm(cm), _bitmap(bitmap),
_workers(workers), _active_workers(n_workers)
{ }
// Executes the given task using concurrent marking worker threads.
virtual void execute(ProcessTask& task);
virtual void execute(EnqueueTask& task);
};
class G1RefProcTaskProxy: public AbstractGangTask {
typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
ProcessTask& _proc_task;
G1CollectedHeap* _g1h;
ConcurrentMark* _cm;
CMBitMap* _bitmap;
public:
G1RefProcTaskProxy(ProcessTask& proc_task,
G1CollectedHeap* g1h,
ConcurrentMark* cm,
CMBitMap* bitmap) :
AbstractGangTask("Process reference objects in parallel"),
_proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
{}
virtual void work(int i) {
CMTask* marking_task = _cm->task(i);
G1CMIsAliveClosure g1_is_alive(_g1h);
G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
_proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
}
};
void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) {
assert(_workers != NULL, "Need parallel worker threads.");
G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
// We need to reset the phase for each task execution so that
// the termination protocol of CMTask::do_marking_step works.
_cm->set_phase(_active_workers, false /* concurrent */);
_g1h->set_par_threads(_active_workers);
_workers->run_task(&proc_task_proxy);
_g1h->set_par_threads(0);
}
class G1RefEnqueueTaskProxy: public AbstractGangTask {
typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
EnqueueTask& _enq_task;
public:
G1RefEnqueueTaskProxy(EnqueueTask& enq_task) :
AbstractGangTask("Enqueue reference objects in parallel"),
_enq_task(enq_task)
{ }
virtual void work(int i) {
_enq_task.work(i);
}
};
void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) {
assert(_workers != NULL, "Need parallel worker threads.");
G1RefEnqueueTaskProxy enq_task_proxy(enq_task);
_g1h->set_par_threads(_active_workers);
_workers->run_task(&enq_task_proxy);
_g1h->set_par_threads(0);
}
void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
ResourceMark rm;
HandleMark hm;
@ -1917,18 +2130,52 @@ void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
G1CMDrainMarkingStackClosure
g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
// XXXYYY Also: copy the parallel ref processing code from CMS.
rp->process_discovered_references(&g1_is_alive,
&g1_keep_alive,
&g1_drain_mark_stack,
NULL);
// We use the work gang from the G1CollectedHeap and we utilize all
// the worker threads.
int active_workers = MAX2(MIN2(g1h->workers()->total_workers(), (int)_max_task_num), 1);
G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
g1h->workers(), active_workers);
if (rp->processing_is_mt()) {
// Set the degree of MT here. If the discovery is done MT, there
// may have been a different number of threads doing the discovery
// and a different number of discovered lists may have Ref objects.
// That is OK as long as the Reference lists are balanced (see
// balance_all_queues() and balance_queues()).
rp->set_mt_degree(active_workers);
rp->process_discovered_references(&g1_is_alive,
&g1_keep_alive,
&g1_drain_mark_stack,
&par_task_executor);
// The work routines of the parallel keep_alive and drain_marking_stack
// will set the has_overflown flag if we overflow the global marking
// stack.
} else {
rp->process_discovered_references(&g1_is_alive,
&g1_keep_alive,
&g1_drain_mark_stack,
NULL);
}
assert(_markStack.overflow() || _markStack.isEmpty(),
"mark stack should be empty (unless it overflowed)");
"mark stack should be empty (unless it overflowed)");
if (_markStack.overflow()) {
// Should have been done already when we tried to push an
// entry on to the global mark stack. But let's do it again.
set_has_overflown();
}
rp->enqueue_discovered_references();
if (rp->processing_is_mt()) {
assert(rp->num_q() == active_workers, "why not");
rp->enqueue_discovered_references(&par_task_executor);
} else {
rp->enqueue_discovered_references();
}
rp->verify_no_references_recorded();
assert(!rp->discovery_enabled(), "should have been disabled");
@ -1955,7 +2202,9 @@ public:
CMTask* task = _cm->task(worker_i);
task->record_start_time();
do {
task->do_marking_step(1000000000.0 /* something very large */);
task->do_marking_step(1000000000.0 /* something very large */,
true /* do_stealing */,
true /* do_termination */);
} while (task->has_aborted() && !_cm->has_overflown());
// If we overflow, then we do not want to restart. We instead
// want to abort remark and do concurrent marking again.
@ -1978,7 +2227,7 @@ void ConcurrentMark::checkpointRootsFinalWork() {
G1CollectedHeap::StrongRootsScope srs(g1h);
// this is remark, so we'll use up all available threads
int active_workers = ParallelGCThreads;
set_phase(active_workers, false);
set_phase(active_workers, false /* concurrent */);
CMRemarkTask remarkTask(this);
// We will start all available threads, even if we decide that the
@ -1992,7 +2241,7 @@ void ConcurrentMark::checkpointRootsFinalWork() {
G1CollectedHeap::StrongRootsScope srs(g1h);
// this is remark, so we'll use up all available threads
int active_workers = 1;
set_phase(active_workers, false);
set_phase(active_workers, false /* concurrent */);
CMRemarkTask remarkTask(this);
// We will start all available threads, even if we decide that the
@ -2005,9 +2254,6 @@ void ConcurrentMark::checkpointRootsFinalWork() {
print_stats();
if (!restart_for_overflow())
set_non_marking_state();
#if VERIFY_OBJS_PROCESSED
if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
@ -3124,7 +3370,7 @@ void CMTask::deal_with_reference(oop obj) {
// do nothing
}
#else // _CHECK_BOTH_FINGERS_
// we will only check the global finger
// we will only check the global finger
if (objAddr < global_finger) {
// see long comment above
@ -3249,7 +3495,7 @@ void CMTask::regular_clock_call() {
double elapsed_time_ms = curr_time_ms - _start_time_ms;
if (elapsed_time_ms > _time_target_ms) {
set_has_aborted();
_has_aborted_timed_out = true;
_has_timed_out = true;
statsOnly( ++_aborted_timed_out );
return;
}
@ -3754,7 +4000,9 @@ void CMTask::print_stats() {
*****************************************************************************/
void CMTask::do_marking_step(double time_target_ms) {
void CMTask::do_marking_step(double time_target_ms,
bool do_stealing,
bool do_termination) {
assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
assert(concurrent() == _cm->concurrent(), "they should be the same");
@ -3794,7 +4042,7 @@ void CMTask::do_marking_step(double time_target_ms) {
// clear all flags
clear_has_aborted();
_has_aborted_timed_out = false;
_has_timed_out = false;
_draining_satb_buffers = false;
++_calls;
@ -3970,7 +4218,7 @@ void CMTask::do_marking_step(double time_target_ms) {
drain_global_stack(false);
// Attempt at work stealing from other task's queues.
if (!has_aborted()) {
if (do_stealing && !has_aborted()) {
// We have not aborted. This means that we have finished all that
// we could. Let's try to do some stealing...
@ -4011,7 +4259,7 @@ void CMTask::do_marking_step(double time_target_ms) {
// We still haven't aborted. Now, let's try to get into the
// termination protocol.
if (!has_aborted()) {
if (do_termination && !has_aborted()) {
// We cannot check whether the global stack is empty, since other
// tasks might be concurrently pushing objects on it. We also cannot
// check if the region stack is empty because if a thread is aborting
@ -4087,7 +4335,7 @@ void CMTask::do_marking_step(double time_target_ms) {
statsOnly( ++_aborted );
if (_has_aborted_timed_out) {
if (_has_timed_out) {
double diff_ms = elapsed_time_ms - _time_target_ms;
// Keep statistics of how well we did with respect to hitting
// our target only if we actually timed out (if we aborted for

View File

@ -353,6 +353,10 @@ class ConcurrentMark: public CHeapObj {
friend class CMConcurrentMarkingTask;
friend class G1ParNoteEndTask;
friend class CalcLiveObjectsClosure;
friend class G1RefProcTaskProxy;
friend class G1RefProcTaskExecutor;
friend class G1CMParKeepAliveAndDrainClosure;
friend class G1CMParDrainMarkingStackClosure;
protected:
ConcurrentMarkThread* _cmThread; // the thread doing the work
@ -936,7 +940,7 @@ private:
// if this is true, then the task has aborted for some reason
bool _has_aborted;
// set when the task aborts because it has met its time quota
bool _has_aborted_timed_out;
bool _has_timed_out;
// true when we're draining SATB buffers; this avoids the task
// aborting due to SATB buffers being available (as we're already
// dealing with them)
@ -1041,7 +1045,7 @@ public:
// trying not to exceed the given duration. However, it might exit
// prematurely, according to some conditions (i.e. SATB buffers are
// available for processing).
void do_marking_step(double target_ms);
void do_marking_step(double target_ms, bool do_stealing, bool do_termination);
// These two calls start and stop the timer
void record_start_time() {
@ -1063,7 +1067,8 @@ public:
bool has_aborted() { return _has_aborted; }
void set_has_aborted() { _has_aborted = true; }
void clear_has_aborted() { _has_aborted = false; }
bool claimed() { return _claimed; }
bool has_timed_out() { return _has_timed_out; }
bool claimed() { return _claimed; }
// Support routines for the partially scanned region that may be
// recorded as a result of aborting while draining the CMRegionStack

View File

@ -81,6 +81,14 @@
product(intx, G1MarkRegionStackSize, 1024 * 1024, \
"Size of the region stack for concurrent marking.") \
\
product(double, G1ConcMarkStepDurationMillis, 10.0, \
"Target duration of individual concurrent marking steps " \
"in milliseconds.") \
\
product(intx, G1RefProcDrainInterval, 10, \
"The number of discovered reference objects to process before " \
"draining concurrent marking work queues.") \
\
develop(bool, G1SATBBarrierPrintNullPreVals, false, \
"If true, count frac of ptr writes with null pre-vals.") \
\

View File

@ -1941,10 +1941,16 @@ bool Arguments::check_vm_args_consistency() {
status = false;
}
#ifndef SERIALGC
if (UseG1GC) {
status = status && verify_percentage(InitiatingHeapOccupancyPercent,
"InitiatingHeapOccupancyPercent");
status = status && verify_min_value(G1RefProcDrainInterval, 1,
"G1RefProcDrainInterval");
status = status && verify_min_value((intx)G1ConcMarkStepDurationMillis, 1,
"G1ConcMarkStepDurationMillis");
}
#endif
status = status && verify_interval(RefDiscoveryPolicy,
ReferenceProcessor::DiscoveryPolicyMin,