8213108: Improve work distribution during remembered set scan

Before scanning the heap for roots into the collection set, merge them into a single remembered set (card table) and do work distribution based on location like other collectors do.

Reviewed-by: kbarrett, lkorinth
This commit is contained in:
Thomas Schatzl 2019-06-27 11:48:32 +02:00
parent a77f50d3d1
commit d46d9318c1
35 changed files with 1803 additions and 1257 deletions

View File

@ -38,7 +38,7 @@ static double rs_length_diff_defaults[] = {
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
};
static double cost_per_card_ms_defaults[] = {
static double cost_per_log_buffer_entry_ms_defaults[] = {
0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015
};
@ -47,7 +47,7 @@ static double young_cards_per_entry_ratio_defaults[] = {
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
};
static double cost_per_entry_ms_defaults[] = {
static double young_only_cost_per_remset_card_ms_defaults[] = {
0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005
};
@ -77,12 +77,12 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
_alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_prev_collection_pause_end_ms(0.0),
_rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
_cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_cost_per_log_buffer_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)),
_young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
_mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_mixed_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_young_only_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_mixed_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
_young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
@ -101,10 +101,10 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
int index = MIN2(ParallelGCThreads - 1, 7u);
_rs_length_diff_seq->add(rs_length_diff_defaults[index]);
_cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]);
_cost_per_log_buffer_entry_ms_seq->add(cost_per_log_buffer_entry_ms_defaults[index]);
_cost_scan_hcc_seq->add(0.0);
_young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]);
_cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]);
_young_only_cost_per_remset_card_ms_seq->add(young_only_cost_per_remset_card_ms_defaults[index]);
_cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]);
_constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]);
_young_other_cost_per_region_ms_seq->add(young_other_cost_per_region_ms_defaults[index]);
@ -158,19 +158,19 @@ void G1Analytics::compute_pause_time_ratio(double interval_ms, double pause_time
(pause_time_ms * _recent_prev_end_times_for_all_gcs_sec->num()) / interval_ms;
}
void G1Analytics::report_cost_per_card_ms(double cost_per_card_ms) {
_cost_per_card_ms_seq->add(cost_per_card_ms);
void G1Analytics::report_cost_per_log_buffer_entry_ms(double cost_per_log_buffer_entry_ms) {
_cost_per_log_buffer_entry_ms_seq->add(cost_per_log_buffer_entry_ms);
}
void G1Analytics::report_cost_scan_hcc(double cost_scan_hcc) {
_cost_scan_hcc_seq->add(cost_scan_hcc);
}
void G1Analytics::report_cost_per_entry_ms(double cost_per_entry_ms, bool for_young_gc) {
void G1Analytics::report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc) {
if (for_young_gc) {
_cost_per_entry_ms_seq->add(cost_per_entry_ms);
_young_only_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms);
} else {
_mixed_cost_per_entry_ms_seq->add(cost_per_entry_ms);
_mixed_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms);
}
}
@ -222,8 +222,8 @@ double G1Analytics::predict_alloc_rate_ms() const {
return get_new_prediction(_alloc_rate_ms_seq);
}
double G1Analytics::predict_cost_per_card_ms() const {
return get_new_prediction(_cost_per_card_ms_seq);
double G1Analytics::predict_cost_per_log_buffer_entry_ms() const {
return get_new_prediction(_cost_per_log_buffer_entry_ms_seq);
}
double G1Analytics::predict_scan_hcc_ms() const {
@ -231,7 +231,7 @@ double G1Analytics::predict_scan_hcc_ms() const {
}
double G1Analytics::predict_rs_update_time_ms(size_t pending_cards) const {
return pending_cards * predict_cost_per_card_ms() + predict_scan_hcc_ms();
return pending_cards * predict_cost_per_log_buffer_entry_ms() + predict_scan_hcc_ms();
}
double G1Analytics::predict_young_cards_per_entry_ratio() const {
@ -256,17 +256,17 @@ size_t G1Analytics::predict_card_num(size_t rs_length, bool for_young_gc) const
double G1Analytics::predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const {
if (for_young_gc) {
return card_num * get_new_prediction(_cost_per_entry_ms_seq);
return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq);
} else {
return predict_mixed_rs_scan_time_ms(card_num);
}
}
double G1Analytics::predict_mixed_rs_scan_time_ms(size_t card_num) const {
if (_mixed_cost_per_entry_ms_seq->num() < 3) {
return card_num * get_new_prediction(_cost_per_entry_ms_seq);
if (_mixed_cost_per_remset_card_ms_seq->num() < 3) {
return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq);
} else {
return card_num * get_new_prediction(_mixed_cost_per_entry_ms_seq);
return card_num * get_new_prediction(_mixed_cost_per_remset_card_ms_seq);
}
}

View File

@ -46,12 +46,12 @@ class G1Analytics: public CHeapObj<mtGC> {
double _prev_collection_pause_end_ms;
TruncatedSeq* _rs_length_diff_seq;
TruncatedSeq* _cost_per_card_ms_seq;
TruncatedSeq* _cost_per_log_buffer_entry_ms_seq;
TruncatedSeq* _cost_scan_hcc_seq;
TruncatedSeq* _young_cards_per_entry_ratio_seq;
TruncatedSeq* _mixed_cards_per_entry_ratio_seq;
TruncatedSeq* _cost_per_entry_ms_seq;
TruncatedSeq* _mixed_cost_per_entry_ms_seq;
TruncatedSeq* _young_only_cost_per_remset_card_ms_seq;
TruncatedSeq* _mixed_cost_per_remset_card_ms_seq;
TruncatedSeq* _cost_per_byte_ms_seq;
TruncatedSeq* _constant_other_time_ms_seq;
TruncatedSeq* _young_other_cost_per_region_ms_seq;
@ -99,9 +99,9 @@ public:
void report_concurrent_mark_remark_times_ms(double ms);
void report_concurrent_mark_cleanup_times_ms(double ms);
void report_alloc_rate_ms(double alloc_rate);
void report_cost_per_card_ms(double cost_per_card_ms);
void report_cost_per_log_buffer_entry_ms(double cost_per_log_buffer_entry_ms);
void report_cost_scan_hcc(double cost_scan_hcc);
void report_cost_per_entry_ms(double cost_per_entry_ms, bool for_young_gc);
void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc);
void report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc);
void report_rs_length_diff(double rs_length_diff);
void report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress);
@ -116,7 +116,7 @@ public:
double predict_alloc_rate_ms() const;
int num_alloc_rate_ms() const;
double predict_cost_per_card_ms() const;
double predict_cost_per_log_buffer_entry_ms() const;
double predict_scan_hcc_ms() const;

View File

@ -30,28 +30,6 @@
#include "runtime/atomic.hpp"
#include "runtime/orderAccess.hpp"
bool G1CardTable::mark_card_deferred(size_t card_index) {
CardValue val = _byte_map[card_index];
// It's already processed
if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) {
return false;
}
// Cached bit can be installed either on a clean card or on a claimed card.
CardValue new_val = val;
if (val == clean_card_val()) {
new_val = deferred_card_val();
} else {
if (val & claimed_card_val()) {
new_val = val | deferred_card_val();
}
}
if (new_val != val) {
Atomic::cmpxchg(new_val, &_byte_map[card_index], val);
}
return true;
}
void G1CardTable::g1_mark_as_young(const MemRegion& mr) {
CardValue *const first = byte_for(mr.start());
CardValue *const last = byte_after(mr.last());

View File

@ -44,56 +44,66 @@ class G1CardTableChangedListener : public G1MappingChangedListener {
virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled);
};
class G1CardTable: public CardTable {
class G1CardTable : public CardTable {
friend class VMStructs;
friend class G1CardTableChangedListener;
G1CardTableChangedListener _listener;
public:
enum G1CardValues {
g1_young_gen = CT_MR_BS_last_reserved << 1
g1_young_gen = CT_MR_BS_last_reserved << 1,
// During evacuation we use the card table to consolidate the cards we need to
// scan for roots onto the card table from the various sources. Further it is
// used to record already completely scanned cards to avoid re-scanning them
// when incrementally evacuating the old gen regions of a collection set.
// This means that already scanned cards should be preserved.
//
// The merge at the start of each evacuation round simply sets cards to dirty
// that are clean; scanned cards are set to 0x1.
//
// This means that the LSB determines what to do with the card during evacuation
// given the following possible values:
//
// 11111111 - clean, do not scan
// 00000001 - already scanned, do not scan
// 00000000 - dirty, needs to be scanned.
//
g1_card_already_scanned = 0x1
};
public:
static const size_t WordAllClean = SIZE_MAX;
static const size_t WordAllDirty = 0;
STATIC_ASSERT(BitsPerByte == 8);
static const size_t WordAlreadyScanned = (SIZE_MAX / 255) * g1_card_already_scanned;
G1CardTable(MemRegion whole_heap): CardTable(whole_heap, /* scanned concurrently */ true), _listener() {
_listener.set_card_table(this);
}
bool is_card_dirty(size_t card_index) {
return _byte_map[card_index] == dirty_card_val();
}
static CardValue g1_young_card_val() { return g1_young_gen; }
/*
Claimed and deferred bits are used together in G1 during the evacuation
pause. These bits can have the following state transitions:
1. The claimed bit can be put over any other card state. Except that
the "dirty -> dirty and claimed" transition is checked for in
G1 code and is not used.
2. Deferred bit can be set only if the previous state of the card
was either clean or claimed. mark_card_deferred() is wait-free.
We do not care if the operation is be successful because if
it does not it will only result in duplicate entry in the update
buffer because of the "cache-miss". So it's not worth spinning.
*/
bool is_card_claimed(size_t card_index) {
CardValue val = _byte_map[card_index];
return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val();
}
inline void set_card_claimed(size_t card_index);
void verify_g1_young_region(MemRegion mr) PRODUCT_RETURN;
void g1_mark_as_young(const MemRegion& mr);
bool mark_card_deferred(size_t card_index);
bool is_card_deferred(size_t card_index) {
CardValue val = _byte_map[card_index];
return (val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val();
size_t index_for_cardvalue(CardValue const* p) const {
return pointer_delta(p, _byte_map, sizeof(CardValue));
}
// Mark the given card as Dirty if it is Clean.
inline void mark_clean_as_dirty(size_t card_index);
// Change Clean cards in a (large) area on the card table as Dirty, preserving
// already scanned cards. Assumes that most cards in that area are Clean.
inline void mark_region_dirty(size_t start_card_index, size_t num_cards);
// Mark the given range of cards as Scanned. All of these cards must be Dirty.
inline void mark_as_scanned(size_t start_card_index, size_t num_cards);
inline uint region_idx_for(CardValue* p);
static size_t compute_size(size_t mem_region_size_in_words) {
size_t number_of_slots = (mem_region_size_in_words / card_size_in_words);
return ReservedSpace::allocation_align_size_up(number_of_slots);

View File

@ -26,15 +26,58 @@
#define SHARE_GC_G1_G1CARDTABLE_INLINE_HPP
#include "gc/g1/g1CardTable.hpp"
#include "gc/g1/heapRegion.hpp"
void G1CardTable::set_card_claimed(size_t card_index) {
jbyte val = _byte_map[card_index];
if (val == clean_card_val()) {
val = (jbyte)claimed_card_val();
} else {
val |= (jbyte)claimed_card_val();
}
_byte_map[card_index] = val;
inline uint G1CardTable::region_idx_for(CardValue* p) {
size_t const card_idx = pointer_delta(p, _byte_map, sizeof(CardValue));
return (uint)(card_idx >> (HeapRegion::LogOfHRGrainBytes - card_shift));
}
#endif // SHARE_GC_G1_G1CARDTABLE_INLINE_HPP
inline void G1CardTable::mark_clean_as_dirty(size_t card_index) {
CardValue value = _byte_map[card_index];
if (value == clean_card_val()) {
_byte_map[card_index] = dirty_card_val();
}
}
inline void G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) {
assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned.");
assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible.");
size_t const num_chunks = num_cards / sizeof(size_t);
size_t* cur_word = (size_t*)&_byte_map[start_card_index];
size_t* const end_word_map = cur_word + num_chunks;
while (cur_word < end_word_map) {
size_t value = *cur_word;
if (value == WordAllClean) {
*cur_word = WordAllDirty;
} else if (value == WordAllDirty) {
// do nothing.
} else {
// There is a mix of cards in there. Tread slowly.
CardValue* cur = (CardValue*)cur_word;
for (size_t i = 0; i < sizeof(size_t); i++) {
CardValue value = *cur;
if (value == clean_card_val()) {
*cur = dirty_card_val();
}
cur++;
}
}
cur_word++;
}
}
inline void G1CardTable::mark_as_scanned(size_t start_card_index, size_t num_cards) {
CardValue* start = &_byte_map[start_card_index];
CardValue* const end = start + num_cards;
while (start < end) {
CardValue value = *start;
assert(value == dirty_card_val(),
"Must have been dirty %d start " PTR_FORMAT " " PTR_FORMAT, value, p2i(start), p2i(end));
*start++ = g1_card_already_scanned;
}
}
#endif /* SHARE_GC_G1_G1CARDTABLE_INLINE_HPP */

View File

@ -1954,7 +1954,7 @@ void G1CollectedHeap::iterate_dirty_card_closure(G1CardTableEntryClosure* cl, ui
n_completed_buffers++;
}
assert(dcqs.completed_buffers_num() == 0, "Completed buffers exist!");
phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers, G1GCPhaseTimes::UpdateRSProcessedBuffers);
phase_times()->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_i, n_completed_buffers, G1GCPhaseTimes::MergeLBProcessedBuffers);
}
// Computes the sum of the storage used by the various regions.
@ -2238,8 +2238,8 @@ void G1CollectedHeap::collection_set_iterate_all(HeapRegionClosure* cl) {
_collection_set.iterate(cl);
}
void G1CollectedHeap::collection_set_iterate_increment_from(HeapRegionClosure *cl, uint worker_id) {
_collection_set.iterate_incremental_part_from(cl, worker_id, workers()->active_workers());
void G1CollectedHeap::collection_set_iterate_increment_from(HeapRegionClosure *cl, HeapRegionClaimer* hr_claimer, uint worker_id) {
_collection_set.iterate_incremental_part_from(cl, hr_claimer, worker_id, workers()->active_workers());
}
HeapWord* G1CollectedHeap::block_start(const void* addr) const {
@ -2630,8 +2630,6 @@ class RegisterRegionsWithRegionAttrTableClosure : public HeapRegionClosure {
size_t _total_humongous;
size_t _candidate_humongous;
G1DirtyCardQueue _dcq;
bool humongous_region_is_candidate(G1CollectedHeap* g1h, HeapRegion* region) const {
assert(region->is_starts_humongous(), "Must start a humongous object");
@ -2691,8 +2689,7 @@ class RegisterRegionsWithRegionAttrTableClosure : public HeapRegionClosure {
public:
RegisterRegionsWithRegionAttrTableClosure()
: _total_humongous(0),
_candidate_humongous(0),
_dcq(&G1BarrierSet::dirty_card_queue_set()) {
_candidate_humongous(0) {
}
virtual bool do_heap_region(HeapRegion* r) {
@ -2707,49 +2704,9 @@ class RegisterRegionsWithRegionAttrTableClosure : public HeapRegionClosure {
uint rindex = r->hrm_index();
g1h->set_humongous_reclaim_candidate(rindex, is_candidate);
if (is_candidate) {
_candidate_humongous++;
g1h->register_humongous_region_with_region_attr(rindex);
// Is_candidate already filters out humongous object with large remembered sets.
// If we have a humongous object with a few remembered sets, we simply flush these
// remembered set entries into the DCQS. That will result in automatic
// re-evaluation of their remembered set entries during the following evacuation
// phase.
if (!r->rem_set()->is_empty()) {
guarantee(r->rem_set()->occupancy_less_or_equal_than(G1RSetSparseRegionEntries),
"Found a not-small remembered set here. This is inconsistent with previous assumptions.");
G1CardTable* ct = g1h->card_table();
HeapRegionRemSetIterator hrrs(r->rem_set());
size_t card_index;
while (hrrs.has_next(card_index)) {
CardTable::CardValue* card_ptr = ct->byte_for_index(card_index);
// The remembered set might contain references to already freed
// regions. Filter out such entries to avoid failing card table
// verification.
if (g1h->is_in(ct->addr_for(card_ptr))) {
if (*card_ptr != G1CardTable::dirty_card_val()) {
*card_ptr = G1CardTable::dirty_card_val();
_dcq.enqueue(card_ptr);
}
}
}
assert(hrrs.n_yielded() == r->rem_set()->occupied(),
"Remembered set hash maps out of sync, cur: " SIZE_FORMAT " entries, next: " SIZE_FORMAT " entries",
hrrs.n_yielded(), r->rem_set()->occupied());
// We should only clear the card based remembered set here as we will not
// implicitly rebuild anything else during eager reclaim. Note that at the moment
// (and probably never) we do not enter this path if there are other kind of
// remembered sets for this region.
r->rem_set()->clear_locked(true /* only_cardset */);
// Clear_locked() above sets the state to Empty. However we want to continue
// collecting remembered set entries for humongous regions that were not
// reclaimed.
r->rem_set()->set_state_complete();
#ifdef ASSERT
G1HeapRegionAttr region_attr = g1h->region_attr(oop(r->bottom()));
assert(region_attr.needs_remset_update(), "must be");
#endif
}
assert(r->rem_set()->is_empty(), "At this point any humongous candidate remembered set must be empty.");
_candidate_humongous++;
// We will later handle the remembered sets of these regions.
} else {
g1h->register_region_with_region_attr(r);
}
@ -2760,8 +2717,6 @@ class RegisterRegionsWithRegionAttrTableClosure : public HeapRegionClosure {
size_t total_humongous() const { return _total_humongous; }
size_t candidate_humongous() const { return _candidate_humongous; }
void flush_rem_set_entries() { _dcq.flush(); }
};
void G1CollectedHeap::register_regions_with_region_attr() {
@ -2774,9 +2729,6 @@ void G1CollectedHeap::register_regions_with_region_attr() {
cl.total_humongous(),
cl.candidate_humongous());
_has_humongous_reclaim_candidates = cl.candidate_humongous() > 0;
// Finally flush all remembered set entries to re-check into the global DCQS.
cl.flush_rem_set_entries();
}
#ifndef PRODUCT
@ -3071,7 +3023,7 @@ bool G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_
workers()->active_workers(),
collection_set()->young_region_length(),
collection_set()->optional_region_length());
pre_evacuate_collection_set(evacuation_info);
pre_evacuate_collection_set(evacuation_info, &per_thread_states);
// Actually do the work...
evacuate_initial_collection_set(&per_thread_states);
@ -3104,9 +3056,7 @@ bool G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_
double sample_end_time_sec = os::elapsedTime();
double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS;
size_t total_cards_scanned = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ScanRSScannedCards) +
phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanRS, G1GCPhaseTimes::ScanRSScannedCards);
policy()->record_collection_pause_end(pause_time_ms, total_cards_scanned, heap_used_bytes_before_gc);
policy()->record_collection_pause_end(pause_time_ms, heap_used_bytes_before_gc);
}
verify_after_young_collection(verify_type);
@ -3580,7 +3530,7 @@ void G1CollectedHeap::merge_per_thread_state_info(G1ParScanThreadStateSet* per_t
phase_times()->record_merge_pss_time_ms((os::elapsedTime() - merge_pss_time_start) * 1000.0);
}
void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info) {
void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) {
_expand_heap_after_alloc_failure = true;
_evacuation_failed = false;
@ -3591,10 +3541,15 @@ void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_i
// Initialize the GC alloc regions.
_allocator->init_gc_alloc_regions(evacuation_info);
{
Ticks start = Ticks::now();
rem_set()->prepare_for_scan_heap_roots();
phase_times()->record_prepare_heap_roots_time_ms((Ticks::now() - start).seconds() * 1000.0);
}
register_regions_with_region_attr();
assert(_verifier->check_region_attr_table(), "Inconsistency in the region attributes table.");
rem_set()->prepare_for_scan_rem_set();
_preserved_marks_set.assert_empty();
#if COMPILER2_OR_JVMCI
@ -3696,8 +3651,8 @@ class G1EvacuateRegionsTask : public G1EvacuateRegionsBaseTask {
void scan_roots(G1ParScanThreadState* pss, uint worker_id) {
_root_processor->evacuate_roots(pss, worker_id);
_g1h->rem_set()->update_rem_set(pss, worker_id);
_g1h->rem_set()->scan_rem_set(pss, worker_id, G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ObjCopy, G1GCPhaseTimes::CodeRoots);
_g1h->rem_set()->scan_heap_roots(pss, worker_id, G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ObjCopy);
_g1h->rem_set()->scan_collection_set_regions(pss, worker_id, G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::CodeRoots, G1GCPhaseTimes::ObjCopy);
}
void evacuate_live_objects(G1ParScanThreadState* pss, uint worker_id) {
@ -3724,6 +3679,14 @@ public:
};
void G1CollectedHeap::evacuate_initial_collection_set(G1ParScanThreadStateSet* per_thread_states) {
G1GCPhaseTimes* p = phase_times();
{
Ticks start = Ticks::now();
rem_set()->merge_heap_roots(false /* remset_only */, G1GCPhaseTimes::MergeRS);
p->record_merge_heap_roots_time((Ticks::now() - start).seconds() * 1000.0);
}
Tickspan task_time;
const uint num_workers = workers()->active_workers();
@ -3738,7 +3701,6 @@ void G1CollectedHeap::evacuate_initial_collection_set(G1ParScanThreadStateSet* p
}
Tickspan total_processing = Ticks::now() - start_processing;
G1GCPhaseTimes* p = phase_times();
p->record_initial_evac_time(task_time.seconds() * 1000.0);
p->record_or_add_code_root_fixup_time((total_processing - task_time).seconds() * 1000.0);
}
@ -3746,7 +3708,8 @@ void G1CollectedHeap::evacuate_initial_collection_set(G1ParScanThreadStateSet* p
class G1EvacuateOptionalRegionsTask : public G1EvacuateRegionsBaseTask {
void scan_roots(G1ParScanThreadState* pss, uint worker_id) {
_g1h->rem_set()->scan_rem_set(pss, worker_id, G1GCPhaseTimes::OptScanRS, G1GCPhaseTimes::OptObjCopy, G1GCPhaseTimes::OptCodeRoots);
_g1h->rem_set()->scan_heap_roots(pss, worker_id, G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::OptObjCopy);
_g1h->rem_set()->scan_collection_set_regions(pss, worker_id, G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::OptCodeRoots, G1GCPhaseTimes::OptObjCopy);
}
void evacuate_live_objects(G1ParScanThreadState* pss, uint worker_id) {
@ -3782,8 +3745,6 @@ void G1CollectedHeap::evacuate_next_optional_regions(G1ParScanThreadStateSet* pe
void G1CollectedHeap::evacuate_optional_collection_set(G1ParScanThreadStateSet* per_thread_states) {
const double gc_start_time_ms = phase_times()->cur_collection_start_sec() * 1000.0;
Ticks start = Ticks::now();
while (!evacuation_failed() && _collection_set.optional_region_length() > 0) {
double time_used_ms = os::elapsedTime() * 1000.0 - gc_start_time_ms;
@ -3796,18 +3757,24 @@ void G1CollectedHeap::evacuate_optional_collection_set(G1ParScanThreadStateSet*
break;
}
evacuate_next_optional_regions(per_thread_states);
{
Ticks start = Ticks::now();
rem_set()->merge_heap_roots(true /* remset_only */, G1GCPhaseTimes::OptMergeRS);
phase_times()->record_or_add_optional_merge_heap_roots_time((Ticks::now() - start).seconds() * 1000.0);
}
{
Ticks start = Ticks::now();
evacuate_next_optional_regions(per_thread_states);
phase_times()->record_or_add_optional_evac_time((Ticks::now() - start).seconds() * 1000.0);
}
}
_collection_set.abandon_optional_collection_set(per_thread_states);
phase_times()->record_or_add_optional_evac_time((Ticks::now() - start).seconds() * 1000.0);
}
void G1CollectedHeap::post_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) {
// Also cleans the card table from temporary duplicate detection information used
// during UpdateRS/ScanRS.
rem_set()->cleanup_after_scan_rem_set();
rem_set()->cleanup_after_scan_heap_roots();
// Process any discovered reference objects - we have
// to do this _before_ we retire the GC alloc regions

View File

@ -78,7 +78,6 @@ class G1Policy;
class G1HotCardCache;
class G1RemSet;
class G1YoungRemSetSamplingThread;
class HeapRegionRemSetIterator;
class G1ConcurrentMark;
class G1ConcurrentMarkThread;
class G1ConcurrentRefine;
@ -757,7 +756,7 @@ private:
void evacuate_next_optional_regions(G1ParScanThreadStateSet* per_thread_states);
public:
void pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info);
void pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* pss);
void post_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* pss);
void expand_heap_after_young_collection();
@ -1115,7 +1114,8 @@ public:
public:
inline G1HeapRegionAttr region_attr(const void* obj);
inline G1HeapRegionAttr region_attr(const void* obj) const;
inline G1HeapRegionAttr region_attr(uint idx) const;
// Return "TRUE" iff the given object address is in the reserved
// region of g1.
@ -1182,7 +1182,12 @@ public:
// Starts the iteration so that the start regions of a given worker id over the
// set active_workers are evenly spread across the set of collection set regions
// to be iterated.
void collection_set_iterate_increment_from(HeapRegionClosure *blk, uint worker_id);
// The variant with the HeapRegionClaimer guarantees that the closure will be
// applied to a particular region exactly once.
void collection_set_iterate_increment_from(HeapRegionClosure *blk, uint worker_id) {
collection_set_iterate_increment_from(blk, NULL, worker_id);
}
void collection_set_iterate_increment_from(HeapRegionClosure *blk, HeapRegionClaimer* hr_claimer, uint worker_id);
// Returns the HeapRegion that contains addr. addr must not be NULL.
template <class T>

View File

@ -163,10 +163,14 @@ bool G1CollectedHeap::is_in_cset_or_humongous(const oop obj) {
return _region_attr.is_in_cset_or_humongous((HeapWord*)obj);
}
G1HeapRegionAttr G1CollectedHeap::region_attr(const void* addr) {
G1HeapRegionAttr G1CollectedHeap::region_attr(const void* addr) const {
return _region_attr.at((HeapWord*)addr);
}
G1HeapRegionAttr G1CollectedHeap::region_attr(uint idx) const {
return _region_attr.get_by_index(idx);
}
void G1CollectedHeap::register_humongous_region_with_region_attr(uint index) {
_region_attr.set_humongous(index, region_at(index)->rem_set()->is_tracked());
}
@ -177,7 +181,7 @@ void G1CollectedHeap::register_region_with_region_attr(HeapRegion* r) {
void G1CollectedHeap::register_old_region_with_region_attr(HeapRegion* r) {
_region_attr.set_in_old(r->hrm_index(), r->rem_set()->is_tracked());
_rem_set->prepare_for_scan_rem_set(r->hrm_index());
_rem_set->prepare_for_scan_heap_roots(r->hrm_index());
}
void G1CollectedHeap::register_optional_region_with_region_attr(HeapRegion* r) {

View File

@ -217,10 +217,13 @@ void G1CollectionSet::iterate_optional(HeapRegionClosure* cl) const {
}
}
void G1CollectionSet::iterate_incremental_part_from(HeapRegionClosure* cl, uint worker_id, uint total_workers) const {
void G1CollectionSet::iterate_incremental_part_from(HeapRegionClosure* cl,
HeapRegionClaimer* hr_claimer,
uint worker_id,
uint total_workers) const {
assert_at_safepoint();
size_t len = _collection_set_cur_length - _inc_part_start;
size_t len = increment_length();
if (len == 0) {
return;
}
@ -229,9 +232,12 @@ void G1CollectionSet::iterate_incremental_part_from(HeapRegionClosure* cl, uint
size_t cur_pos = start_pos;
do {
HeapRegion* r = _g1h->region_at(_collection_set_regions[cur_pos + _inc_part_start]);
bool result = cl->do_heap_region(r);
guarantee(!result, "Must not cancel iteration");
uint region_idx = _collection_set_regions[cur_pos + _inc_part_start];
if (hr_claimer == NULL || hr_claimer->claim_region(region_idx)) {
HeapRegion* r = _g1h->region_at(region_idx);
bool result = cl->do_heap_region(r);
guarantee(!result, "Must not cancel iteration");
}
cur_pos++;
if (cur_pos == len) {

View File

@ -36,6 +36,7 @@ class G1ParScanThreadStateSet;
class G1Policy;
class G1SurvivorRegions;
class HeapRegion;
class HeapRegionClaimer;
class HeapRegionClosure;
// The collection set.
@ -279,7 +280,12 @@ public:
// Iterate over the current collection set increment applying the given HeapRegionClosure
// from a starting position determined by the given worker id.
void iterate_incremental_part_from(HeapRegionClosure* cl, uint worker_id, uint total_workers) const;
void iterate_incremental_part_from(HeapRegionClosure* cl, HeapRegionClaimer* hr_claimer, uint worker_id, uint total_workers) const;
// Returns the length of the current increment in number of regions.
size_t increment_length() const { return _collection_set_cur_length - _inc_part_start; }
// Returns the length of the whole current collection set in number of regions
size_t cur_length() const { return _collection_set_cur_length; }
// Iterate over the entire collection set (all increments calculated so far), applying
// the given HeapRegionClosure on all of them.

View File

@ -206,7 +206,7 @@ static Thresholds calc_thresholds(size_t green_zone,
// available buffers near green_zone value. When yellow_size is
// large we don't want to allow a full step to accumulate before
// doing any processing, as that might lead to significantly more
// than green_zone buffers to be processed by update_rs.
// than green_zone buffers to be processed during scanning.
step = MIN2(step, ParallelGCThreads / 2.0);
}
size_t activate_offset = static_cast<size_t>(ceil(step * (worker_i + 1)));
@ -322,18 +322,18 @@ void G1ConcurrentRefine::print_threads_on(outputStream* st) const {
}
static size_t calc_new_green_zone(size_t green,
double update_rs_time,
size_t update_rs_processed_buffers,
double log_buffer_scan_time,
size_t processed_log_buffers,
double goal_ms) {
// Adjust green zone based on whether we're meeting the time goal.
// Limit to max_green_zone.
const double inc_k = 1.1, dec_k = 0.9;
if (update_rs_time > goal_ms) {
if (log_buffer_scan_time > goal_ms) {
if (green > 0) {
green = static_cast<size_t>(green * dec_k);
}
} else if (update_rs_time < goal_ms &&
update_rs_processed_buffers > green) {
} else if (log_buffer_scan_time < goal_ms &&
processed_log_buffers > green) {
green = static_cast<size_t>(MAX2(green * inc_k, green + 1.0));
green = MIN2(green, max_green_zone);
}
@ -350,20 +350,20 @@ static size_t calc_new_red_zone(size_t green, size_t yellow) {
return MIN2(yellow + (yellow - green), max_red_zone);
}
void G1ConcurrentRefine::update_zones(double update_rs_time,
size_t update_rs_processed_buffers,
void G1ConcurrentRefine::update_zones(double log_buffer_scan_time,
size_t processed_log_buffers,
double goal_ms) {
log_trace( CTRL_TAGS )("Updating Refinement Zones: "
"update_rs time: %.3fms, "
"update_rs buffers: " SIZE_FORMAT ", "
"update_rs goal time: %.3fms",
update_rs_time,
update_rs_processed_buffers,
"log buffer scan time: %.3fms, "
"processed buffers: " SIZE_FORMAT ", "
"goal time: %.3fms",
log_buffer_scan_time,
processed_log_buffers,
goal_ms);
_green_zone = calc_new_green_zone(_green_zone,
update_rs_time,
update_rs_processed_buffers,
log_buffer_scan_time,
processed_log_buffers,
goal_ms);
_yellow_zone = calc_new_yellow_zone(_green_zone, _min_yellow_zone_size);
_red_zone = calc_new_red_zone(_green_zone, _yellow_zone);
@ -376,13 +376,13 @@ void G1ConcurrentRefine::update_zones(double update_rs_time,
_green_zone, _yellow_zone, _red_zone);
}
void G1ConcurrentRefine::adjust(double update_rs_time,
size_t update_rs_processed_buffers,
void G1ConcurrentRefine::adjust(double log_buffer_scan_time,
size_t processed_log_buffers,
double goal_ms) {
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
if (G1UseAdaptiveConcRefinement) {
update_zones(update_rs_time, update_rs_processed_buffers, goal_ms);
update_zones(log_buffer_scan_time, processed_log_buffers, goal_ms);
// Change the barrier params
if (max_num_threads() == 0) {

View File

@ -97,8 +97,8 @@ class G1ConcurrentRefine : public CHeapObj<mtGC> {
size_t min_yellow_zone_size);
// Update green/yellow/red zone values based on how well goals are being met.
void update_zones(double update_rs_time,
size_t update_rs_processed_buffers,
void update_zones(double log_buffer_scan_time,
size_t processed_log_buffers,
double goal_ms);
static uint worker_id_offset();
@ -115,7 +115,7 @@ public:
void stop();
// Adjust refinement thresholds based on work done during the pause and the goal time.
void adjust(double update_rs_time, size_t update_rs_processed_buffers, double goal_ms);
void adjust(double log_buffer_scan_time, size_t processed_log_buffers, double goal_ms);
size_t activation_threshold(uint worker_id) const;
size_t deactivation_threshold(uint worker_id) const;

View File

@ -37,15 +37,19 @@
#include "oops/compressedOops.inline.hpp"
#include "oops/oop.inline.hpp"
class UpdateRSetDeferred : public BasicOopIterateClosure {
class UpdateLogBuffersDeferred : public BasicOopIterateClosure {
private:
G1CollectedHeap* _g1h;
G1DirtyCardQueue* _dcq;
G1CardTable* _ct;
// Remember the last enqueued card to avoid enqueuing the same card over and over;
// since we only ever handle a card once, this is sufficient.
size_t _last_enqueued_card;
public:
UpdateRSetDeferred(G1DirtyCardQueue* dcq) :
_g1h(G1CollectedHeap::heap()), _dcq(dcq), _ct(_g1h->card_table()) {}
UpdateLogBuffersDeferred(G1DirtyCardQueue* dcq) :
_g1h(G1CollectedHeap::heap()), _dcq(dcq), _ct(_g1h->card_table()), _last_enqueued_card(SIZE_MAX) {}
virtual void do_oop(narrowOop* p) { do_oop_work(p); }
virtual void do_oop( oop* p) { do_oop_work(p); }
@ -62,8 +66,9 @@ public:
return;
}
size_t card_index = _ct->index_for(p);
if (_ct->mark_card_deferred(card_index)) {
if (card_index != _last_enqueued_card) {
_dcq->enqueue(_ct->byte_for_index(card_index));
_last_enqueued_card = card_index;
}
}
};
@ -73,21 +78,21 @@ class RemoveSelfForwardPtrObjClosure: public ObjectClosure {
G1ConcurrentMark* _cm;
HeapRegion* _hr;
size_t _marked_bytes;
UpdateRSetDeferred* _update_rset_cl;
UpdateLogBuffersDeferred* _log_buffer_cl;
bool _during_initial_mark;
uint _worker_id;
HeapWord* _last_forwarded_object_end;
public:
RemoveSelfForwardPtrObjClosure(HeapRegion* hr,
UpdateRSetDeferred* update_rset_cl,
UpdateLogBuffersDeferred* log_buffer_cl,
bool during_initial_mark,
uint worker_id) :
_g1h(G1CollectedHeap::heap()),
_cm(_g1h->concurrent_mark()),
_hr(hr),
_marked_bytes(0),
_update_rset_cl(update_rset_cl),
_log_buffer_cl(log_buffer_cl),
_during_initial_mark(during_initial_mark),
_worker_id(worker_id),
_last_forwarded_object_end(hr->bottom()) { }
@ -144,7 +149,7 @@ public:
// The problem is that, if evacuation fails, we might have
// remembered set entries missing given that we skipped cards on
// the collection set. So, we'll recreate such entries now.
obj->oop_iterate(_update_rset_cl);
obj->oop_iterate(_log_buffer_cl);
HeapWord* obj_end = obj_addr + obj_size;
_last_forwarded_object_end = obj_end;
@ -193,25 +198,22 @@ public:
class RemoveSelfForwardPtrHRClosure: public HeapRegionClosure {
G1CollectedHeap* _g1h;
uint _worker_id;
HeapRegionClaimer* _hrclaimer;
G1DirtyCardQueue _dcq;
UpdateRSetDeferred _update_rset_cl;
UpdateLogBuffersDeferred _log_buffer_cl;
public:
RemoveSelfForwardPtrHRClosure(uint worker_id,
HeapRegionClaimer* hrclaimer) :
RemoveSelfForwardPtrHRClosure(uint worker_id) :
_g1h(G1CollectedHeap::heap()),
_worker_id(worker_id),
_hrclaimer(hrclaimer),
_dcq(&_g1h->dirty_card_queue_set()),
_update_rset_cl(&_dcq){
_log_buffer_cl(&_dcq) {
}
size_t remove_self_forward_ptr_by_walking_hr(HeapRegion* hr,
bool during_initial_mark) {
RemoveSelfForwardPtrObjClosure rspc(hr,
&_update_rset_cl,
&_log_buffer_cl,
during_initial_mark,
_worker_id);
hr->object_iterate(&rspc);
@ -225,26 +227,24 @@ public:
assert(!hr->is_pinned(), "Unexpected pinned region at index %u", hr->hrm_index());
assert(hr->in_collection_set(), "bad CS");
if (_hrclaimer->claim_region(hr->hrm_index())) {
if (hr->evacuation_failed()) {
hr->clear_index_in_opt_cset();
if (hr->evacuation_failed()) {
hr->clear_index_in_opt_cset();
bool during_initial_mark = _g1h->collector_state()->in_initial_mark_gc();
bool during_conc_mark = _g1h->collector_state()->mark_or_rebuild_in_progress();
bool during_initial_mark = _g1h->collector_state()->in_initial_mark_gc();
bool during_conc_mark = _g1h->collector_state()->mark_or_rebuild_in_progress();
hr->note_self_forwarding_removal_start(during_initial_mark,
hr->note_self_forwarding_removal_start(during_initial_mark,
during_conc_mark);
_g1h->verifier()->check_bitmaps("Self-Forwarding Ptr Removal", hr);
_g1h->verifier()->check_bitmaps("Self-Forwarding Ptr Removal", hr);
hr->reset_bot();
hr->reset_bot();
size_t live_bytes = remove_self_forward_ptr_by_walking_hr(hr, during_initial_mark);
size_t live_bytes = remove_self_forward_ptr_by_walking_hr(hr, during_initial_mark);
hr->rem_set()->clean_strong_code_roots(hr);
hr->rem_set()->clear_locked(true);
hr->rem_set()->clean_strong_code_roots(hr);
hr->rem_set()->clear_locked(true);
hr->note_self_forwarding_removal_end(live_bytes);
}
hr->note_self_forwarding_removal_end(live_bytes);
}
return false;
}
@ -256,7 +256,7 @@ G1ParRemoveSelfForwardPtrsTask::G1ParRemoveSelfForwardPtrsTask() :
_hrclaimer(_g1h->workers()->active_workers()) { }
void G1ParRemoveSelfForwardPtrsTask::work(uint worker_id) {
RemoveSelfForwardPtrHRClosure rsfp_cl(worker_id, &_hrclaimer);
RemoveSelfForwardPtrHRClosure rsfp_cl(worker_id);
_g1h->collection_set_iterate_increment_from(&rsfp_cl, worker_id);
_g1h->collection_set_iterate_increment_from(&rsfp_cl, &_hrclaimer, worker_id);
}

View File

@ -66,14 +66,30 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[WaitForStrongCLD] = new WorkerDataArray<double>(max_gc_threads, "Wait For Strong CLD (ms):");
_gc_par_phases[WeakCLDRoots] = new WorkerDataArray<double>(max_gc_threads, "Weak CLD Roots (ms):");
_gc_par_phases[UpdateRS] = new WorkerDataArray<double>(max_gc_threads, "Update RS (ms):");
_gc_par_phases[MergeRS] = new WorkerDataArray<double>(max_gc_threads, "Remembered Sets (ms):");
_merge_rs_merged_sparse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Sparse:");
_gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_sparse, MergeRSMergedSparse);
_merge_rs_merged_fine = new WorkerDataArray<size_t>(max_gc_threads, "Merged Fine:");
_gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_fine, MergeRSMergedFine);
_merge_rs_merged_coarse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Coarse:");
_gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_coarse, MergeRSMergedCoarse);
_gc_par_phases[OptMergeRS] = new WorkerDataArray<double>(max_gc_threads, "Optional Remembered Sets (ms):");
_opt_merge_rs_merged_sparse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Sparse:");
_gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_sparse, MergeRSMergedSparse);
_opt_merge_rs_merged_fine = new WorkerDataArray<size_t>(max_gc_threads, "Merged Fine:");
_gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_fine, MergeRSMergedFine);
_opt_merge_rs_merged_coarse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Coarse:");
_gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_coarse, MergeRSMergedCoarse);
_gc_par_phases[MergeLB] = new WorkerDataArray<double>(max_gc_threads, "Log Buffers (ms):");
if (G1HotCardCache::default_use_cache()) {
_gc_par_phases[ScanHCC] = new WorkerDataArray<double>(max_gc_threads, "Scan HCC (ms):");
_gc_par_phases[MergeHCC] = new WorkerDataArray<double>(max_gc_threads, "Hot Card Cache (ms):");
} else {
_gc_par_phases[ScanHCC] = NULL;
_gc_par_phases[MergeHCC] = NULL;
}
_gc_par_phases[ScanRS] = new WorkerDataArray<double>(max_gc_threads, "Scan RS (ms):");
_gc_par_phases[OptScanRS] = new WorkerDataArray<double>(max_gc_threads, "Optional Scan RS (ms):");
_gc_par_phases[ScanHR] = new WorkerDataArray<double>(max_gc_threads, "Scan Heap Roots (ms):");
_gc_par_phases[OptScanHR] = new WorkerDataArray<double>(max_gc_threads, "Optional Scan Heap Roots (ms):");
_gc_par_phases[CodeRoots] = new WorkerDataArray<double>(max_gc_threads, "Code Root Scan (ms):");
_gc_par_phases[OptCodeRoots] = new WorkerDataArray<double>(max_gc_threads, "Optional Code Root Scan (ms):");
_gc_par_phases[ObjCopy] = new WorkerDataArray<double>(max_gc_threads, "Object Copy (ms):");
@ -84,30 +100,30 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[GCWorkerEnd] = new WorkerDataArray<double>(max_gc_threads, "GC Worker End (ms):");
_gc_par_phases[Other] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Other (ms):");
_scan_rs_scanned_cards = new WorkerDataArray<size_t>(max_gc_threads, "Scanned Cards:");
_gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_scanned_cards, ScanRSScannedCards);
_scan_rs_claimed_cards = new WorkerDataArray<size_t>(max_gc_threads, "Claimed Cards:");
_gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_claimed_cards, ScanRSClaimedCards);
_scan_rs_skipped_cards = new WorkerDataArray<size_t>(max_gc_threads, "Skipped Cards:");
_gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_skipped_cards, ScanRSSkippedCards);
_scan_hr_scanned_cards = new WorkerDataArray<size_t>(max_gc_threads, "Scanned Cards:");
_gc_par_phases[ScanHR]->link_thread_work_items(_scan_hr_scanned_cards, ScanHRScannedCards);
_scan_hr_scanned_blocks = new WorkerDataArray<size_t>(max_gc_threads, "Scanned Blocks:");
_gc_par_phases[ScanHR]->link_thread_work_items(_scan_hr_scanned_blocks, ScanHRScannedBlocks);
_scan_hr_claimed_chunks = new WorkerDataArray<size_t>(max_gc_threads, "Claimed Chunks:");
_gc_par_phases[ScanHR]->link_thread_work_items(_scan_hr_claimed_chunks, ScanHRClaimedChunks);
_opt_scan_rs_scanned_cards = new WorkerDataArray<size_t>(max_gc_threads, "Scanned Cards:");
_gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_scanned_cards, ScanRSScannedCards);
_opt_scan_rs_claimed_cards = new WorkerDataArray<size_t>(max_gc_threads, "Claimed Cards:");
_gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_claimed_cards, ScanRSClaimedCards);
_opt_scan_rs_skipped_cards = new WorkerDataArray<size_t>(max_gc_threads, "Skipped Cards:");
_gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_skipped_cards, ScanRSSkippedCards);
_opt_scan_rs_scanned_opt_refs = new WorkerDataArray<size_t>(max_gc_threads, "Scanned Refs:");
_gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_scanned_opt_refs, ScanRSScannedOptRefs);
_opt_scan_rs_used_memory = new WorkerDataArray<size_t>(max_gc_threads, "Used Memory:");
_gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_used_memory, ScanRSUsedMemory);
_opt_scan_hr_scanned_cards = new WorkerDataArray<size_t>(max_gc_threads, "Scanned Cards:");
_gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_scanned_cards, ScanHRScannedCards);
_opt_scan_hr_scanned_blocks = new WorkerDataArray<size_t>(max_gc_threads, "Scanned Blocks:");
_gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_scanned_blocks, ScanHRScannedBlocks);
_opt_scan_hr_claimed_chunks = new WorkerDataArray<size_t>(max_gc_threads, "Claimed Chunks:");
_gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_claimed_chunks, ScanHRClaimedChunks);
_opt_scan_hr_scanned_opt_refs = new WorkerDataArray<size_t>(max_gc_threads, "Scanned Refs:");
_gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_scanned_opt_refs, ScanHRScannedOptRefs);
_opt_scan_hr_used_memory = new WorkerDataArray<size_t>(max_gc_threads, "Used Memory:");
_gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_used_memory, ScanHRUsedMemory);
_update_rs_processed_buffers = new WorkerDataArray<size_t>(max_gc_threads, "Processed Buffers:");
_gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers, UpdateRSProcessedBuffers);
_update_rs_scanned_cards = new WorkerDataArray<size_t>(max_gc_threads, "Scanned Cards:");
_gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_scanned_cards, UpdateRSScannedCards);
_update_rs_skipped_cards = new WorkerDataArray<size_t>(max_gc_threads, "Skipped Cards:");
_gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_skipped_cards, UpdateRSSkippedCards);
_merge_lb_processed_buffers = new WorkerDataArray<size_t>(max_gc_threads, "Processed Buffers:");
_gc_par_phases[MergeLB]->link_thread_work_items(_merge_lb_processed_buffers, MergeLBProcessedBuffers);
_merge_lb_dirty_cards = new WorkerDataArray<size_t>(max_gc_threads, "Dirty Cards:");
_gc_par_phases[MergeLB]->link_thread_work_items(_merge_lb_dirty_cards, MergeLBDirtyCards);
_merge_lb_skipped_cards = new WorkerDataArray<size_t>(max_gc_threads, "Skipped Cards:");
_gc_par_phases[MergeLB]->link_thread_work_items(_merge_lb_skipped_cards, MergeLBSkippedCards);
_obj_copy_lab_waste = new WorkerDataArray<size_t>(max_gc_threads, "LAB Waste");
_gc_par_phases[ObjCopy]->link_thread_work_items(_obj_copy_lab_waste, ObjCopyLABWaste);
@ -148,6 +164,8 @@ void G1GCPhaseTimes::reset() {
_cur_optional_evac_ms = 0.0;
_cur_collection_code_root_fixup_time_ms = 0.0;
_cur_strong_code_root_purge_time_ms = 0.0;
_cur_merge_heap_roots_time_ms = 0.0;
_cur_optional_merge_heap_roots_time_ms = 0.0;
_cur_evac_fail_recalc_used = 0.0;
_cur_evac_fail_remove_self_forwards = 0.0;
_cur_string_deduplication_time_ms = 0.0;
@ -160,6 +178,7 @@ void G1GCPhaseTimes::reset() {
_cur_collection_start_sec = 0.0;
_root_region_scan_wait_time_ms = 0.0;
_external_accounted_time_ms = 0.0;
_recorded_prepare_heap_roots_time_ms = 0.0;
_recorded_clear_claimed_marks_time_ms = 0.0;
_recorded_young_cset_choice_time_ms = 0.0;
_recorded_non_young_cset_choice_time_ms = 0.0;
@ -219,9 +238,7 @@ void G1GCPhaseTimes::note_gc_end() {
record_time_secs(GCWorkerTotal, i , total_worker_time);
double worker_known_time = worker_time(ExtRootScan, i) +
worker_time(ScanHCC, i) +
worker_time(UpdateRS, i) +
worker_time(ScanRS, i) +
worker_time(ScanHR, i) +
worker_time(CodeRoots, i) +
worker_time(ObjCopy, i) +
worker_time(Termination, i);
@ -231,11 +248,15 @@ void G1GCPhaseTimes::note_gc_end() {
// Make sure all slots are uninitialized since this thread did not seem to have been started
ASSERT_PHASE_UNINITIALIZED(GCWorkerEnd);
ASSERT_PHASE_UNINITIALIZED(ExtRootScan);
ASSERT_PHASE_UNINITIALIZED(ScanHCC);
ASSERT_PHASE_UNINITIALIZED(UpdateRS);
ASSERT_PHASE_UNINITIALIZED(ScanRS);
ASSERT_PHASE_UNINITIALIZED(MergeHCC);
ASSERT_PHASE_UNINITIALIZED(MergeRS);
ASSERT_PHASE_UNINITIALIZED(OptMergeRS);
ASSERT_PHASE_UNINITIALIZED(MergeLB);
ASSERT_PHASE_UNINITIALIZED(ScanHR);
ASSERT_PHASE_UNINITIALIZED(CodeRoots);
ASSERT_PHASE_UNINITIALIZED(OptCodeRoots);
ASSERT_PHASE_UNINITIALIZED(ObjCopy);
ASSERT_PHASE_UNINITIALIZED(OptObjCopy);
ASSERT_PHASE_UNINITIALIZED(Termination);
}
}
@ -365,6 +386,7 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
_recorded_young_cset_choice_time_ms +
_recorded_non_young_cset_choice_time_ms +
_cur_region_register_time +
_recorded_prepare_heap_roots_time_ms +
_recorded_clear_claimed_marks_time_ms;
info_time("Pre Evacuate Collection Set", sum_ms);
@ -380,6 +402,7 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
trace_count("Humongous Candidate", _cur_fast_reclaim_humongous_candidates);
}
debug_time("Prepare Heap Roots", _recorded_prepare_heap_roots_time_ms);
if (_recorded_clear_claimed_marks_time_ms > 0.0) {
debug_time("Clear Claimed Marks", _recorded_clear_claimed_marks_time_ms);
}
@ -387,10 +410,13 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
}
double G1GCPhaseTimes::print_evacuate_optional_collection_set() const {
const double sum_ms = _cur_optional_evac_ms;
const double sum_ms = _cur_optional_evac_ms + _cur_optional_merge_heap_roots_time_ms;
if (sum_ms > 0) {
info_time("Evacuate Optional Collection Set", sum_ms);
debug_phase(_gc_par_phases[OptScanRS]);
info_time("Merge Optional Heap Roots", _cur_optional_merge_heap_roots_time_ms);
debug_phase(_gc_par_phases[OptMergeRS]);
info_time("Evacuate Optional Collection Set", _cur_optional_evac_ms);
debug_phase(_gc_par_phases[OptScanHR]);
debug_phase(_gc_par_phases[OptObjCopy]);
debug_phase(_gc_par_phases[OptCodeRoots]);
debug_phase(_gc_par_phases[OptTermination]);
@ -398,21 +424,23 @@ double G1GCPhaseTimes::print_evacuate_optional_collection_set() const {
return sum_ms;
}
double G1GCPhaseTimes::print_evacuate_collection_set() const {
const double sum_ms = _cur_collection_initial_evac_time_ms;
double G1GCPhaseTimes::print_evacuate_initial_collection_set() const {
info_time("Merge Heap Roots", _cur_merge_heap_roots_time_ms);
info_time("Evacuate Collection Set", sum_ms);
debug_phase(_gc_par_phases[MergeRS]);
if (G1HotCardCache::default_use_cache()) {
debug_phase(_gc_par_phases[MergeHCC]);
}
debug_phase(_gc_par_phases[MergeLB]);
info_time("Evacuate Collection Set", _cur_collection_initial_evac_time_ms);
trace_phase(_gc_par_phases[GCWorkerStart], false);
debug_phase(_gc_par_phases[ExtRootScan]);
for (int i = ExtRootScanSubPhasesFirst; i <= ExtRootScanSubPhasesLast; i++) {
trace_phase(_gc_par_phases[i]);
}
if (G1HotCardCache::default_use_cache()) {
debug_phase(_gc_par_phases[ScanHCC]);
}
debug_phase(_gc_par_phases[UpdateRS]);
debug_phase(_gc_par_phases[ScanRS]);
debug_phase(_gc_par_phases[ScanHR]);
debug_phase(_gc_par_phases[CodeRoots]);
debug_phase(_gc_par_phases[ObjCopy]);
debug_phase(_gc_par_phases[Termination]);
@ -420,7 +448,7 @@ double G1GCPhaseTimes::print_evacuate_collection_set() const {
debug_phase(_gc_par_phases[GCWorkerTotal]);
trace_phase(_gc_par_phases[GCWorkerEnd], false);
return sum_ms;
return _cur_collection_initial_evac_time_ms + _cur_merge_heap_roots_time_ms;
}
double G1GCPhaseTimes::print_post_evacuate_collection_set() const {
@ -503,7 +531,7 @@ void G1GCPhaseTimes::print() {
double accounted_ms = 0.0;
accounted_ms += print_pre_evacuate_collection_set();
accounted_ms += print_evacuate_collection_set();
accounted_ms += print_evacuate_initial_collection_set();
accounted_ms += print_evacuate_optional_collection_set();
accounted_ms += print_post_evacuate_collection_set();
print_other(accounted_ms);
@ -530,10 +558,12 @@ const char* G1GCPhaseTimes::phase_name(GCParPhases phase) {
"CMRefRoots",
"WaitForStrongCLD",
"WeakCLDRoots",
"UpdateRS",
"ScanHCC",
"ScanRS",
"OptScanRS",
"MergeRS",
"OptMergeRS",
"MergeLB",
"MergeHCC",
"ScanHR",
"OptScanHR",
"CodeRoots",
"OptCodeRoots",
"ObjCopy",
@ -580,8 +610,8 @@ void G1EvacPhaseWithTrimTimeTracker::stop() {
_stopped = true;
}
G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id) :
_start_time(), _phase(phase), _phase_times(phase_times), _worker_id(worker_id), _event() {
G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id, bool must_record) :
_start_time(), _phase(phase), _phase_times(phase_times), _worker_id(worker_id), _event(), _must_record(must_record) {
if (_phase_times != NULL) {
_start_time = Ticks::now();
}
@ -589,7 +619,11 @@ G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times,
G1GCParPhaseTimesTracker::~G1GCParPhaseTimesTracker() {
if (_phase_times != NULL) {
_phase_times->record_time_secs(_phase, _worker_id, (Ticks::now() - _start_time).seconds());
if (_must_record) {
_phase_times->record_time_secs(_phase, _worker_id, (Ticks::now() - _start_time).seconds());
} else {
_phase_times->record_or_add_time_secs(_phase, _worker_id, (Ticks::now() - _start_time).seconds());
}
_event.commit(GCId::current(), _worker_id, G1GCPhaseTimes::phase_name(_phase));
}
}

View File

@ -60,10 +60,12 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
CMRefRoots,
WaitForStrongCLD,
WeakCLDRoots,
UpdateRS,
ScanHCC,
ScanRS,
OptScanRS,
MergeRS,
OptMergeRS,
MergeLB,
MergeHCC,
ScanHR,
OptScanHR,
CodeRoots,
OptCodeRoots,
ObjCopy,
@ -84,18 +86,24 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
static const GCParPhases ExtRootScanSubPhasesFirst = ThreadRoots;
static const GCParPhases ExtRootScanSubPhasesLast = WeakCLDRoots;
enum GCScanRSWorkItems {
ScanRSScannedCards,
ScanRSClaimedCards,
ScanRSSkippedCards,
ScanRSScannedOptRefs,
ScanRSUsedMemory
enum GCMergeRSWorkTimes {
MergeRSMergedSparse,
MergeRSMergedFine,
MergeRSMergedCoarse
};
enum GCUpdateRSWorkItems {
UpdateRSProcessedBuffers,
UpdateRSScannedCards,
UpdateRSSkippedCards
enum GCScanHRWorkItems {
ScanHRScannedCards,
ScanHRScannedBlocks,
ScanHRClaimedChunks,
ScanHRScannedOptRefs,
ScanHRUsedMemory
};
enum GCMergeLBWorkItems {
MergeLBProcessedBuffers,
MergeLBDirtyCards,
MergeLBSkippedCards
};
enum GCObjCopyWorkItems {
@ -109,19 +117,27 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel];
WorkerDataArray<size_t>* _update_rs_processed_buffers;
WorkerDataArray<size_t>* _update_rs_scanned_cards;
WorkerDataArray<size_t>* _update_rs_skipped_cards;
WorkerDataArray<size_t>* _merge_rs_merged_sparse;
WorkerDataArray<size_t>* _merge_rs_merged_fine;
WorkerDataArray<size_t>* _merge_rs_merged_coarse;
WorkerDataArray<size_t>* _scan_rs_scanned_cards;
WorkerDataArray<size_t>* _scan_rs_claimed_cards;
WorkerDataArray<size_t>* _scan_rs_skipped_cards;
WorkerDataArray<size_t>* _merge_lb_processed_buffers;
WorkerDataArray<size_t>* _merge_lb_dirty_cards;
WorkerDataArray<size_t>* _merge_lb_skipped_cards;
WorkerDataArray<size_t>* _opt_scan_rs_scanned_cards;
WorkerDataArray<size_t>* _opt_scan_rs_claimed_cards;
WorkerDataArray<size_t>* _opt_scan_rs_skipped_cards;
WorkerDataArray<size_t>* _opt_scan_rs_scanned_opt_refs;
WorkerDataArray<size_t>* _opt_scan_rs_used_memory;
WorkerDataArray<size_t>* _scan_hr_scanned_cards;
WorkerDataArray<size_t>* _scan_hr_scanned_blocks;
WorkerDataArray<size_t>* _scan_hr_claimed_chunks;
WorkerDataArray<size_t>* _opt_merge_rs_merged_sparse;
WorkerDataArray<size_t>* _opt_merge_rs_merged_fine;
WorkerDataArray<size_t>* _opt_merge_rs_merged_coarse;
WorkerDataArray<size_t>* _opt_scan_hr_scanned_cards;
WorkerDataArray<size_t>* _opt_scan_hr_scanned_blocks;
WorkerDataArray<size_t>* _opt_scan_hr_claimed_chunks;
WorkerDataArray<size_t>* _opt_scan_hr_scanned_opt_refs;
WorkerDataArray<size_t>* _opt_scan_hr_used_memory;
WorkerDataArray<size_t>* _obj_copy_lab_waste;
WorkerDataArray<size_t>* _obj_copy_lab_undo_waste;
@ -145,6 +161,9 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
double _cur_string_deduplication_time_ms;
double _cur_merge_heap_roots_time_ms;
double _cur_optional_merge_heap_roots_time_ms;
double _cur_prepare_tlab_time_ms;
double _cur_resize_tlab_time_ms;
@ -159,6 +178,8 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
double _external_accounted_time_ms;
double _recorded_prepare_heap_roots_time_ms;
double _recorded_clear_claimed_marks_time_ms;
double _recorded_young_cset_choice_time_ms;
@ -208,7 +229,8 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
void trace_count(const char* name, size_t value) const;
double print_pre_evacuate_collection_set() const;
double print_evacuate_collection_set() const;
double print_merge_heap_roots_time() const;
double print_evacuate_initial_collection_set() const;
double print_evacuate_optional_collection_set() const;
double print_post_evacuate_collection_set() const;
void print_other(double accounted_ms) const;
@ -278,6 +300,14 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
_cur_strong_code_root_purge_time_ms = ms;
}
void record_merge_heap_roots_time(double ms) {
_cur_merge_heap_roots_time_ms += ms;
}
void record_or_add_optional_merge_heap_roots_time(double ms) {
_cur_optional_merge_heap_roots_time_ms += ms;
}
void record_evac_fail_recalc_used_time(double ms) {
_cur_evac_fail_recalc_used = ms;
}
@ -357,6 +387,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
_external_accounted_time_ms += time_ms;
}
void record_prepare_heap_roots_time_ms(double recorded_prepare_heap_roots_time_ms) {
_recorded_prepare_heap_roots_time_ms = recorded_prepare_heap_roots_time_ms;
}
void record_clear_claimed_marks_time_ms(double recorded_clear_claimed_marks_time_ms) {
_recorded_clear_claimed_marks_time_ms = recorded_clear_claimed_marks_time_ms;
}
@ -397,6 +431,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
return _cur_fast_reclaim_humongous_time_ms;
}
size_t fast_reclaim_humongous_candidates() const {
return _cur_fast_reclaim_humongous_candidates;
}
ReferenceProcessorPhaseTimes* ref_phase_times() { return &_ref_phase_times; }
WeakProcessorPhaseTimes* weak_phase_times() { return &_weak_phase_times; }
@ -424,8 +462,10 @@ protected:
G1GCPhaseTimes* _phase_times;
uint _worker_id;
EventGCPhaseParallel _event;
bool _must_record;
public:
G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id);
G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id, bool must_record = true);
virtual ~G1GCParPhaseTimesTracker();
};

View File

@ -39,8 +39,8 @@ void G1HeterogeneousHeapPolicy::init(G1CollectedHeap* g1h, G1CollectionSet* coll
}
// After a collection pause, young list target length is updated. So we need to make sure we have enough regions in dram for young gen.
void G1HeterogeneousHeapPolicy::record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc) {
G1Policy::record_collection_pause_end(pause_time_ms, cards_scanned, heap_used_bytes_before_gc);
void G1HeterogeneousHeapPolicy::record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc) {
G1Policy::record_collection_pause_end(pause_time_ms, heap_used_bytes_before_gc);
_manager->adjust_dram_regions((uint)young_list_target_length(), G1CollectedHeap::heap()->workers());
}

View File

@ -38,7 +38,7 @@ public:
// initialize policy
virtual void init(G1CollectedHeap* g1h, G1CollectionSet* collection_set);
// Record end of an evacuation pause.
virtual void record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc);
virtual void record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc);
// Record the end of full collection.
virtual void record_full_collection_end();

View File

@ -51,6 +51,7 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
_tenuring_threshold(g1h->policy()->tenuring_threshold()),
_scanner(g1h, this),
_worker_id(worker_id),
_last_enqueued_card(SIZE_MAX),
_stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1),
_stack_trim_lower_threshold(GCDrainStackTargetSize),
_trim_ticks(),
@ -371,7 +372,7 @@ void G1ParScanThreadStateSet::record_unused_optional_region(HeapRegion* hr) {
}
size_t used_memory = pss->oops_into_optional_region(hr)->used_memory();
_g1h->phase_times()->record_or_add_thread_work_item(G1GCPhaseTimes::OptScanRS, worker_index, used_memory, G1GCPhaseTimes::ScanRSUsedMemory);
_g1h->phase_times()->record_or_add_thread_work_item(G1GCPhaseTimes::OptScanHR, worker_index, used_memory, G1GCPhaseTimes::ScanHRUsedMemory);
}
}

View File

@ -60,6 +60,10 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
uint _worker_id;
// Remember the last enqueued card to avoid enqueuing the same card over and over;
// since we only ever scan a card once, this is sufficient.
size_t _last_enqueued_card;
// Upper and lower threshold to start and end work queue draining.
uint const _stack_trim_upper_threshold;
uint const _stack_trim_lower_threshold;
@ -128,8 +132,9 @@ public:
}
size_t card_index = ct()->index_for(p);
// If the card hasn't been added to the buffer, do it.
if (ct()->mark_card_deferred(card_index)) {
if (_last_enqueued_card != card_index) {
dirty_card_queue().enqueue(ct()->byte_for_index(card_index));
_last_enqueued_card = card_index;
}
}

View File

@ -572,10 +572,24 @@ bool G1Policy::need_to_start_conc_mark(const char* source, size_t alloc_word_siz
return result;
}
double G1Policy::log_buffer_processing_time() const {
double all_cards_processing_time = average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR);
size_t log_buffer_dirty_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
size_t scan_heap_roots_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
// This may happen if there are duplicate cards in different log buffers.
if (log_buffer_dirty_cards > scan_heap_roots_cards) {
return all_cards_processing_time + average_time_ms(G1GCPhaseTimes::MergeLB);
}
return (all_cards_processing_time * log_buffer_dirty_cards / scan_heap_roots_cards) + average_time_ms(G1GCPhaseTimes::MergeLB);
}
// Anything below that is considered to be zero
#define MIN_TIMER_GRANULARITY 0.0000001
void G1Policy::record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc) {
void G1Policy::record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc) {
G1GCPhaseTimes* p = phase_times();
double end_time_sec = os::elapsedTime();
assert_used_and_recalculate_used_equal(_g1h);
@ -645,29 +659,40 @@ void G1Policy::record_collection_pause_end(double pause_time_ms, size_t cards_sc
_short_lived_surv_rate_group->start_adding_regions();
// Do that for any other surv rate groups
double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::ScanHCC) : 0.0;
double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::MergeHCC) : 0.0;
if (update_stats) {
double cost_per_card_ms = 0.0;
if (_pending_cards > 0) {
cost_per_card_ms = (average_time_ms(G1GCPhaseTimes::UpdateRS)) / (double) _pending_cards;
_analytics->report_cost_per_card_ms(cost_per_card_ms);
double cost_per_log_buffer_entry = 0.0;
size_t const pending_log_buffer_entries = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
if (pending_log_buffer_entries > 0) {
cost_per_log_buffer_entry = log_buffer_processing_time() / pending_log_buffer_entries;
_analytics->report_cost_per_log_buffer_entry_ms(cost_per_log_buffer_entry);
}
_analytics->report_cost_scan_hcc(scan_hcc_time_ms);
double cost_per_entry_ms = 0.0;
if (cards_scanned > 10) {
double avg_time_scan_rs = average_time_ms(G1GCPhaseTimes::ScanRS);
if (this_pause_was_young_only) {
avg_time_scan_rs += average_time_ms(G1GCPhaseTimes::OptScanRS);
}
cost_per_entry_ms = avg_time_scan_rs / cards_scanned;
_analytics->report_cost_per_entry_ms(cost_per_entry_ms, this_pause_was_young_only);
size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
size_t remset_cards_scanned = 0;
// There might have been duplicate log buffer entries in the queues which could
// increase this value beyond the cards scanned. In this case attribute all cards
// to the log buffers.
if (pending_log_buffer_entries <= total_cards_scanned) {
remset_cards_scanned = total_cards_scanned - pending_log_buffer_entries;
}
double cost_per_remset_card_ms = 0.0;
if (remset_cards_scanned > 10) {
double avg_time_remset_scan = ((average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR)) *
remset_cards_scanned / total_cards_scanned) +
average_time_ms(G1GCPhaseTimes::MergeRS);
cost_per_remset_card_ms = avg_time_remset_scan / remset_cards_scanned;
_analytics->report_cost_per_remset_card_ms(cost_per_remset_card_ms, this_pause_was_young_only);
}
if (_max_rs_lengths > 0) {
double cards_per_entry_ratio =
(double) cards_scanned / (double) _max_rs_lengths;
(double) remset_cards_scanned / (double) _max_rs_lengths;
_analytics->report_cards_per_entry_ratio(cards_per_entry_ratio, this_pause_was_young_only);
}
@ -759,20 +784,26 @@ void G1Policy::record_collection_pause_end(double pause_time_ms, size_t cards_sc
}
// Note that _mmu_tracker->max_gc_time() returns the time in seconds.
double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
double scan_log_buffer_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
if (update_rs_time_goal_ms < scan_hcc_time_ms) {
if (scan_log_buffer_time_goal_ms < scan_hcc_time_ms) {
log_debug(gc, ergo, refine)("Adjust concurrent refinement thresholds (scanning the HCC expected to take longer than Update RS time goal)."
"Update RS time goal: %1.2fms Scan HCC time: %1.2fms",
update_rs_time_goal_ms, scan_hcc_time_ms);
"Log Buffer Scan time goal: %1.2fms Scan HCC time: %1.2fms",
scan_log_buffer_time_goal_ms, scan_hcc_time_ms);
update_rs_time_goal_ms = 0;
scan_log_buffer_time_goal_ms = 0;
} else {
update_rs_time_goal_ms -= scan_hcc_time_ms;
scan_log_buffer_time_goal_ms -= scan_hcc_time_ms;
}
_g1h->concurrent_refine()->adjust(average_time_ms(G1GCPhaseTimes::UpdateRS),
phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS),
update_rs_time_goal_ms);
double const log_buffer_time = log_buffer_processing_time();
log_debug(gc, ergo, refine)("Concurrent refinement times: Log Buffer Scan time goal: %1.2fms Log Buffer Scan time: %1.2fms HCC time: %1.2fms",
scan_log_buffer_time_goal_ms, log_buffer_time, scan_hcc_time_ms);
_g1h->concurrent_refine()->adjust(log_buffer_time,
phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBProcessedBuffers),
scan_log_buffer_time_goal_ms);
}
G1IHOPControl* G1Policy::create_ihop_control(const G1Predictions* predictor){

View File

@ -111,6 +111,8 @@ class G1Policy: public CHeapObj<mtGC> {
bool should_update_surv_rate_group_predictors() {
return collector_state()->in_young_only_phase() && !collector_state()->mark_or_rebuild_in_progress();
}
double log_buffer_processing_time() const;
public:
const G1Predictions& predictor() const { return _predictor; }
const G1Analytics* analytics() const { return const_cast<const G1Analytics*>(_analytics); }
@ -311,7 +313,7 @@ public:
// Record the start and end of an evacuation pause.
void record_collection_pause_start(double start_time_sec);
virtual void record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc);
virtual void record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc);
// Record the start and end of a full collection.
void record_full_collection_start();

File diff suppressed because it is too large Load Diff

View File

@ -46,6 +46,7 @@ class G1CMBitMap;
class G1HotCardCache;
class G1RemSetScanState;
class G1ParScanThreadState;
class G1ParScanThreadStateSet;
class G1Policy;
class G1ScanCardClosure;
class HeapRegionClaimer;
@ -84,39 +85,39 @@ public:
G1HotCardCache* hot_card_cache);
~G1RemSet();
// Scan all remembered sets of the collection set for references into the collection
// set.
// Further applies heap_region_codeblobs on the oops of the unmarked nmethods on the strong code
// roots list for each region in the collection set.
void scan_rem_set(G1ParScanThreadState* pss,
uint worker_i,
G1GCPhaseTimes::GCParPhases scan_phase,
G1GCPhaseTimes::GCParPhases objcopy_phase,
G1GCPhaseTimes::GCParPhases coderoots_phase);
// Scan all cards in the non-collection set regions that potentially contain
// references into the current whole collection set.
void scan_heap_roots(G1ParScanThreadState* pss,
uint worker_id,
G1GCPhaseTimes::GCParPhases scan_phase,
G1GCPhaseTimes::GCParPhases objcopy_phase);
// Flush remaining refinement buffers for cross-region references to either evacuate references
// into the collection set or update the remembered set.
void update_rem_set(G1ParScanThreadState* pss, uint worker_i);
// Merge cards from various sources (remembered sets, hot card cache, log buffers)
// and calculate the cards that need to be scanned later (via scan_heap_roots()).
// If remembered_set_only is set, only merge remembered set cards.
void merge_heap_roots(bool remembered_set_only, G1GCPhaseTimes::GCParPhases merge_phase);
// Prepare for and cleanup after scanning the remembered sets. Must be called
// Prepare for and cleanup after scanning the heap roots. Must be called
// once before and after in sequential code.
void prepare_for_scan_rem_set();
void cleanup_after_scan_rem_set();
// Prepares the given region for remembered set scanning.
void prepare_for_scan_rem_set(uint region_idx);
void prepare_for_scan_heap_roots();
// Cleans the card table from temporary duplicate detection information.
void cleanup_after_scan_heap_roots();
// Prepares the given region for heap root scanning.
void prepare_for_scan_heap_roots(uint region_idx);
G1RemSetScanState* scan_state() const { return _scan_state; }
// Do work for regions in the current increment of the collection set, scanning
// non-card based (heap) roots.
void scan_collection_set_regions(G1ParScanThreadState* pss,
uint worker_id,
G1GCPhaseTimes::GCParPhases scan_phase,
G1GCPhaseTimes::GCParPhases coderoots_phase,
G1GCPhaseTimes::GCParPhases objcopy_phase);
// Refine the card corresponding to "card_ptr". Safe to be called concurrently
// to the mutator.
void refine_card_concurrently(CardValue* card_ptr,
uint worker_i);
// Refine the card corresponding to "card_ptr", applying the given closure to
// all references found. Must only be called during gc.
// Returns whether the card has been scanned.
bool refine_card_during_gc(CardValue* card_ptr, G1ScanCardClosure* update_rs_cl);
// Print accumulated summary info from the start of the VM.
void print_summary_info();

View File

@ -49,6 +49,7 @@
int HeapRegion::LogOfHRGrainBytes = 0;
int HeapRegion::LogOfHRGrainWords = 0;
int HeapRegion::LogCardsPerRegion = 0;
size_t HeapRegion::GrainBytes = 0;
size_t HeapRegion::GrainWords = 0;
size_t HeapRegion::CardsPerRegion = 0;
@ -105,6 +106,8 @@ void HeapRegion::setup_heap_region_size(size_t initial_heap_size, size_t max_hea
guarantee(CardsPerRegion == 0, "we should only set it once");
CardsPerRegion = GrainBytes >> G1CardTable::card_shift;
LogCardsPerRegion = log2_long((jlong) CardsPerRegion);
if (G1HeapRegionSize != GrainBytes) {
FLAG_SET_ERGO(G1HeapRegionSize, GrainBytes);
}

View File

@ -60,7 +60,6 @@ class G1CollectedHeap;
class G1CMBitMap;
class G1IsAliveAndApplyClosure;
class HeapRegionRemSet;
class HeapRegionRemSetIterator;
class HeapRegion;
class HeapRegionSetBase;
class nmethod;
@ -315,6 +314,7 @@ class HeapRegion: public G1ContiguousSpace {
static int LogOfHRGrainBytes;
static int LogOfHRGrainWords;
static int LogCardsPerRegion;
static size_t GrainBytes;
static size_t GrainWords;

View File

@ -27,7 +27,7 @@
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/heapRegionManager.inline.hpp"
#include "gc/g1/heapRegionRemSet.hpp"
#include "gc/g1/heapRegionRemSet.inline.hpp"
#include "gc/shared/space.inline.hpp"
#include "memory/allocation.hpp"
#include "memory/padded.inline.hpp"
@ -42,195 +42,21 @@
const char* HeapRegionRemSet::_state_strings[] = {"Untracked", "Updating", "Complete"};
const char* HeapRegionRemSet::_short_state_strings[] = {"UNTRA", "UPDAT", "CMPLT"};
class PerRegionTable: public CHeapObj<mtGC> {
friend class OtherRegionsTable;
friend class HeapRegionRemSetIterator;
HeapRegion* _hr;
CHeapBitMap _bm;
jint _occupied;
// next pointer for free/allocated 'all' list
PerRegionTable* _next;
// prev pointer for the allocated 'all' list
PerRegionTable* _prev;
// next pointer in collision list
PerRegionTable * _collision_list_next;
// Global free list of PRTs
static PerRegionTable* volatile _free_list;
protected:
// We need access in order to union things into the base table.
BitMap* bm() { return &_bm; }
PerRegionTable(HeapRegion* hr) :
_hr(hr),
_bm(HeapRegion::CardsPerRegion, mtGC),
_occupied(0),
_next(NULL), _prev(NULL),
_collision_list_next(NULL)
{}
void add_card_work(CardIdx_t from_card, bool par) {
if (!_bm.at(from_card)) {
if (par) {
if (_bm.par_at_put(from_card, 1)) {
Atomic::inc(&_occupied);
}
} else {
_bm.at_put(from_card, 1);
_occupied++;
}
PerRegionTable* PerRegionTable::alloc(HeapRegion* hr) {
PerRegionTable* fl = _free_list;
while (fl != NULL) {
PerRegionTable* nxt = fl->next();
PerRegionTable* res = Atomic::cmpxchg(nxt, &_free_list, fl);
if (res == fl) {
fl->init(hr, true);
return fl;
} else {
fl = _free_list;
}
}
void add_reference_work(OopOrNarrowOopStar from, bool par) {
// Must make this robust in case "from" is not in "_hr", because of
// concurrency.
HeapRegion* loc_hr = hr();
// If the test below fails, then this table was reused concurrently
// with this operation. This is OK, since the old table was coarsened,
// and adding a bit to the new table is never incorrect.
if (loc_hr->is_in_reserved(from)) {
CardIdx_t from_card = OtherRegionsTable::card_within_region(from, loc_hr);
add_card_work(from_card, par);
}
}
public:
HeapRegion* hr() const { return OrderAccess::load_acquire(&_hr); }
jint occupied() const {
// Overkill, but if we ever need it...
// guarantee(_occupied == _bm.count_one_bits(), "Check");
return _occupied;
}
void init(HeapRegion* hr, bool clear_links_to_all_list) {
if (clear_links_to_all_list) {
set_next(NULL);
set_prev(NULL);
}
_collision_list_next = NULL;
_occupied = 0;
_bm.clear();
// Make sure that the bitmap clearing above has been finished before publishing
// this PRT to concurrent threads.
OrderAccess::release_store(&_hr, hr);
}
void add_reference(OopOrNarrowOopStar from) {
add_reference_work(from, /*parallel*/ true);
}
void seq_add_reference(OopOrNarrowOopStar from) {
add_reference_work(from, /*parallel*/ false);
}
void add_card(CardIdx_t from_card_index) {
add_card_work(from_card_index, /*parallel*/ true);
}
void seq_add_card(CardIdx_t from_card_index) {
add_card_work(from_card_index, /*parallel*/ false);
}
// (Destructively) union the bitmap of the current table into the given
// bitmap (which is assumed to be of the same size.)
void union_bitmap_into(BitMap* bm) {
bm->set_union(_bm);
}
// Mem size in bytes.
size_t mem_size() const {
return sizeof(PerRegionTable) + _bm.size_in_words() * HeapWordSize;
}
// Requires "from" to be in "hr()".
bool contains_reference(OopOrNarrowOopStar from) const {
assert(hr()->is_in_reserved(from), "Precondition.");
size_t card_ind = pointer_delta(from, hr()->bottom(),
G1CardTable::card_size);
return _bm.at(card_ind);
}
// Bulk-free the PRTs from prt to last, assumes that they are
// linked together using their _next field.
static void bulk_free(PerRegionTable* prt, PerRegionTable* last) {
while (true) {
PerRegionTable* fl = _free_list;
last->set_next(fl);
PerRegionTable* res = Atomic::cmpxchg(prt, &_free_list, fl);
if (res == fl) {
return;
}
}
ShouldNotReachHere();
}
static void free(PerRegionTable* prt) {
bulk_free(prt, prt);
}
// Returns an initialized PerRegionTable instance.
static PerRegionTable* alloc(HeapRegion* hr) {
PerRegionTable* fl = _free_list;
while (fl != NULL) {
PerRegionTable* nxt = fl->next();
PerRegionTable* res = Atomic::cmpxchg(nxt, &_free_list, fl);
if (res == fl) {
fl->init(hr, true);
return fl;
} else {
fl = _free_list;
}
}
assert(fl == NULL, "Loop condition.");
return new PerRegionTable(hr);
}
PerRegionTable* next() const { return _next; }
void set_next(PerRegionTable* next) { _next = next; }
PerRegionTable* prev() const { return _prev; }
void set_prev(PerRegionTable* prev) { _prev = prev; }
// Accessor and Modification routines for the pointer for the
// singly linked collision list that links the PRTs within the
// OtherRegionsTable::_fine_grain_regions hash table.
//
// It might be useful to also make the collision list doubly linked
// to avoid iteration over the collisions list during scrubbing/deletion.
// OTOH there might not be many collisions.
PerRegionTable* collision_list_next() const {
return _collision_list_next;
}
void set_collision_list_next(PerRegionTable* next) {
_collision_list_next = next;
}
PerRegionTable** collision_list_next_addr() {
return &_collision_list_next;
}
static size_t fl_mem_size() {
PerRegionTable* cur = _free_list;
size_t res = 0;
while (cur != NULL) {
res += cur->mem_size();
cur = cur->next();
}
return res;
}
static void test_fl_mem_size();
};
assert(fl == NULL, "Loop condition.");
return new PerRegionTable(hr);
}
PerRegionTable* volatile PerRegionTable::_free_list = NULL;
@ -696,175 +522,3 @@ void HeapRegionRemSet::clean_strong_code_roots(HeapRegion* hr) {
size_t HeapRegionRemSet::strong_code_roots_mem_size() {
return _code_roots.mem_size();
}
HeapRegionRemSetIterator:: HeapRegionRemSetIterator(HeapRegionRemSet* hrrs) :
_hrrs(hrrs),
_coarse_map(&hrrs->_other_regions._coarse_map),
_bot(hrrs->_bot),
_g1h(G1CollectedHeap::heap()),
_n_yielded_fine(0),
_n_yielded_coarse(0),
_n_yielded_sparse(0),
_is(Sparse),
_cur_region_card_offset(0),
// Set these values so that we increment to the first region.
_coarse_cur_region_index(-1),
_coarse_cur_region_cur_card(HeapRegion::CardsPerRegion-1),
_fine_cur_prt(NULL),
_cur_card_in_prt(HeapRegion::CardsPerRegion),
_sparse_iter(&hrrs->_other_regions._sparse_table) {}
bool HeapRegionRemSetIterator::coarse_has_next(size_t& card_index) {
if (_hrrs->_other_regions._n_coarse_entries == 0) return false;
// Go to the next card.
_coarse_cur_region_cur_card++;
// Was the last the last card in the current region?
if (_coarse_cur_region_cur_card == HeapRegion::CardsPerRegion) {
// Yes: find the next region. This may leave _coarse_cur_region_index
// Set to the last index, in which case there are no more coarse
// regions.
_coarse_cur_region_index =
(int) _coarse_map->get_next_one_offset(_coarse_cur_region_index + 1);
if ((size_t)_coarse_cur_region_index < _coarse_map->size()) {
_coarse_cur_region_cur_card = 0;
HeapWord* r_bot =
_g1h->region_at((uint) _coarse_cur_region_index)->bottom();
_cur_region_card_offset = _bot->index_for_raw(r_bot);
} else {
return false;
}
}
// If we didn't return false above, then we can yield a card.
card_index = _cur_region_card_offset + _coarse_cur_region_cur_card;
return true;
}
bool HeapRegionRemSetIterator::fine_has_next(size_t& card_index) {
if (fine_has_next()) {
_cur_card_in_prt =
_fine_cur_prt->_bm.get_next_one_offset(_cur_card_in_prt + 1);
}
if (_cur_card_in_prt == HeapRegion::CardsPerRegion) {
// _fine_cur_prt may still be NULL in case if there are not PRTs at all for
// the remembered set.
if (_fine_cur_prt == NULL || _fine_cur_prt->next() == NULL) {
return false;
}
PerRegionTable* next_prt = _fine_cur_prt->next();
switch_to_prt(next_prt);
_cur_card_in_prt = _fine_cur_prt->_bm.get_next_one_offset(_cur_card_in_prt + 1);
}
card_index = _cur_region_card_offset + _cur_card_in_prt;
guarantee(_cur_card_in_prt < HeapRegion::CardsPerRegion,
"Card index " SIZE_FORMAT " must be within the region", _cur_card_in_prt);
return true;
}
bool HeapRegionRemSetIterator::fine_has_next() {
return _cur_card_in_prt != HeapRegion::CardsPerRegion;
}
void HeapRegionRemSetIterator::switch_to_prt(PerRegionTable* prt) {
assert(prt != NULL, "Cannot switch to NULL prt");
_fine_cur_prt = prt;
HeapWord* r_bot = _fine_cur_prt->hr()->bottom();
_cur_region_card_offset = _bot->index_for_raw(r_bot);
// The bitmap scan for the PRT always scans from _cur_region_cur_card + 1.
// To avoid special-casing this start case, and not miss the first bitmap
// entry, initialize _cur_region_cur_card with -1 instead of 0.
_cur_card_in_prt = (size_t)-1;
}
bool HeapRegionRemSetIterator::has_next(size_t& card_index) {
switch (_is) {
case Sparse: {
if (_sparse_iter.has_next(card_index)) {
_n_yielded_sparse++;
return true;
}
// Otherwise, deliberate fall-through
_is = Fine;
PerRegionTable* initial_fine_prt = _hrrs->_other_regions._first_all_fine_prts;
if (initial_fine_prt != NULL) {
switch_to_prt(_hrrs->_other_regions._first_all_fine_prts);
}
}
case Fine:
if (fine_has_next(card_index)) {
_n_yielded_fine++;
return true;
}
// Otherwise, deliberate fall-through
_is = Coarse;
case Coarse:
if (coarse_has_next(card_index)) {
_n_yielded_coarse++;
return true;
}
// Otherwise...
break;
}
return false;
}
#ifndef PRODUCT
void HeapRegionRemSet::test() {
os::sleep(Thread::current(), (jlong)5000, false);
G1CollectedHeap* g1h = G1CollectedHeap::heap();
// Run with "-XX:G1LogRSetRegionEntries=2", so that 1 and 5 end up in same
// hash bucket.
HeapRegion* hr0 = g1h->region_at(0);
HeapRegion* hr1 = g1h->region_at(1);
HeapRegion* hr2 = g1h->region_at(5);
HeapRegion* hr3 = g1h->region_at(6);
HeapRegion* hr4 = g1h->region_at(7);
HeapRegion* hr5 = g1h->region_at(8);
HeapWord* hr1_start = hr1->bottom();
HeapWord* hr1_mid = hr1_start + HeapRegion::GrainWords/2;
HeapWord* hr1_last = hr1->end() - 1;
HeapWord* hr2_start = hr2->bottom();
HeapWord* hr2_mid = hr2_start + HeapRegion::GrainWords/2;
HeapWord* hr2_last = hr2->end() - 1;
HeapWord* hr3_start = hr3->bottom();
HeapWord* hr3_mid = hr3_start + HeapRegion::GrainWords/2;
HeapWord* hr3_last = hr3->end() - 1;
HeapRegionRemSet* hrrs = hr0->rem_set();
// Make three references from region 0x101...
hrrs->add_reference((OopOrNarrowOopStar)hr1_start);
hrrs->add_reference((OopOrNarrowOopStar)hr1_mid);
hrrs->add_reference((OopOrNarrowOopStar)hr1_last);
hrrs->add_reference((OopOrNarrowOopStar)hr2_start);
hrrs->add_reference((OopOrNarrowOopStar)hr2_mid);
hrrs->add_reference((OopOrNarrowOopStar)hr2_last);
hrrs->add_reference((OopOrNarrowOopStar)hr3_start);
hrrs->add_reference((OopOrNarrowOopStar)hr3_mid);
hrrs->add_reference((OopOrNarrowOopStar)hr3_last);
// Now cause a coarsening.
hrrs->add_reference((OopOrNarrowOopStar)hr4->bottom());
hrrs->add_reference((OopOrNarrowOopStar)hr5->bottom());
// Now, does iteration yield these three?
HeapRegionRemSetIterator iter(hrrs);
size_t sum = 0;
size_t card_index;
while (iter.has_next(card_index)) {
HeapWord* card_start = g1h->bot()->address_for_index(card_index);
tty->print_cr(" Card " PTR_FORMAT ".", p2i(card_start));
sum++;
}
guarantee(sum == 11 - 3 + 2048, "Failure");
guarantee(sum == hrrs->occupied(), "Failure");
}
#endif

View File

@ -28,6 +28,7 @@
#include "gc/g1/g1CodeCacheRemSet.hpp"
#include "gc/g1/g1FromCardCache.hpp"
#include "gc/g1/sparsePRT.hpp"
#include "utilities/bitMap.hpp"
// Remembered set for a heap region. Represent a set of "cards" that
// contain pointers into the owner heap region. Cards are defined somewhat
@ -37,7 +38,6 @@ class G1CollectedHeap;
class G1BlockOffsetTable;
class G1CardLiveData;
class HeapRegion;
class HeapRegionRemSetIterator;
class PerRegionTable;
class SparsePRT;
class nmethod;
@ -67,8 +67,6 @@ class nmethod;
// thinking the PRT is for a different region, does no harm.
class OtherRegionsTable {
friend class HeapRegionRemSetIterator;
G1CollectedHeap* _g1h;
Mutex* _m;
@ -125,6 +123,9 @@ public:
// Create a new remembered set. The given mutex is used to ensure consistency.
OtherRegionsTable(Mutex* m);
template <class Closure>
void iterate(Closure& v);
// Returns the card index of the given within_region pointer relative to the bottom
// of the given heap region.
static CardIdx_t card_within_region(OopOrNarrowOopStar within_region, HeapRegion* hr);
@ -157,9 +158,140 @@ public:
void clear();
};
class PerRegionTable: public CHeapObj<mtGC> {
friend class OtherRegionsTable;
HeapRegion* _hr;
CHeapBitMap _bm;
jint _occupied;
// next pointer for free/allocated 'all' list
PerRegionTable* _next;
// prev pointer for the allocated 'all' list
PerRegionTable* _prev;
// next pointer in collision list
PerRegionTable * _collision_list_next;
// Global free list of PRTs
static PerRegionTable* volatile _free_list;
protected:
PerRegionTable(HeapRegion* hr) :
_hr(hr),
_bm(HeapRegion::CardsPerRegion, mtGC),
_occupied(0),
_next(NULL), _prev(NULL),
_collision_list_next(NULL)
{}
inline void add_card_work(CardIdx_t from_card, bool par);
inline void add_reference_work(OopOrNarrowOopStar from, bool par);
public:
// We need access in order to union things into the base table.
BitMap* bm() { return &_bm; }
HeapRegion* hr() const { return OrderAccess::load_acquire(&_hr); }
jint occupied() const {
// Overkill, but if we ever need it...
// guarantee(_occupied == _bm.count_one_bits(), "Check");
return _occupied;
}
void init(HeapRegion* hr, bool clear_links_to_all_list);
inline void add_reference(OopOrNarrowOopStar from);
inline void seq_add_reference(OopOrNarrowOopStar from);
inline void add_card(CardIdx_t from_card_index);
void seq_add_card(CardIdx_t from_card_index);
// (Destructively) union the bitmap of the current table into the given
// bitmap (which is assumed to be of the same size.)
void union_bitmap_into(BitMap* bm) {
bm->set_union(_bm);
}
// Mem size in bytes.
size_t mem_size() const {
return sizeof(PerRegionTable) + _bm.size_in_words() * HeapWordSize;
}
// Requires "from" to be in "hr()".
bool contains_reference(OopOrNarrowOopStar from) const {
assert(hr()->is_in_reserved(from), "Precondition.");
size_t card_ind = pointer_delta(from, hr()->bottom(),
G1CardTable::card_size);
return _bm.at(card_ind);
}
// Bulk-free the PRTs from prt to last, assumes that they are
// linked together using their _next field.
static void bulk_free(PerRegionTable* prt, PerRegionTable* last) {
while (true) {
PerRegionTable* fl = _free_list;
last->set_next(fl);
PerRegionTable* res = Atomic::cmpxchg(prt, &_free_list, fl);
if (res == fl) {
return;
}
}
ShouldNotReachHere();
}
static void free(PerRegionTable* prt) {
bulk_free(prt, prt);
}
// Returns an initialized PerRegionTable instance.
static PerRegionTable* alloc(HeapRegion* hr);
PerRegionTable* next() const { return _next; }
void set_next(PerRegionTable* next) { _next = next; }
PerRegionTable* prev() const { return _prev; }
void set_prev(PerRegionTable* prev) { _prev = prev; }
// Accessor and Modification routines for the pointer for the
// singly linked collision list that links the PRTs within the
// OtherRegionsTable::_fine_grain_regions hash table.
//
// It might be useful to also make the collision list doubly linked
// to avoid iteration over the collisions list during scrubbing/deletion.
// OTOH there might not be many collisions.
PerRegionTable* collision_list_next() const {
return _collision_list_next;
}
void set_collision_list_next(PerRegionTable* next) {
_collision_list_next = next;
}
PerRegionTable** collision_list_next_addr() {
return &_collision_list_next;
}
static size_t fl_mem_size() {
PerRegionTable* cur = _free_list;
size_t res = 0;
while (cur != NULL) {
res += cur->mem_size();
cur = cur->next();
}
return res;
}
static void test_fl_mem_size();
};
class HeapRegionRemSet : public CHeapObj<mtGC> {
friend class VMStructs;
friend class HeapRegionRemSetIterator;
private:
G1BlockOffsetTable* _bot;
@ -182,18 +314,23 @@ public:
// Setup sparse and fine-grain tables sizes.
static void setup_remset_size();
bool cardset_is_empty() const {
return _other_regions.is_empty();
}
bool is_empty() const {
return (strong_code_roots_list_length() == 0) && cardset_is_empty();
return (strong_code_roots_list_length() == 0) && _other_regions.is_empty();
}
bool occupancy_less_or_equal_than(size_t occ) const {
return (strong_code_roots_list_length() == 0) && _other_regions.occupancy_less_or_equal_than(occ);
}
// For each PRT in the card (remembered) set call one of the following methods
// of the given closure:
//
// set_full_region_dirty(uint region_idx) - pass the region index for coarse PRTs
// set_bitmap_dirty(uint region_idx, BitMap* bitmap) - pass the region index and bitmap for fine PRTs
// set_cards_dirty(uint region_idx, elem_t* cards, uint num_cards) - pass region index and cards for sparse PRTs
template <class Closure>
inline void iterate_prts(Closure& cl);
size_t occupied() {
MutexLocker x(&_m, Mutex::_no_safepoint_check_flag);
return occupied_locked();
@ -339,70 +476,4 @@ public:
#endif
};
class HeapRegionRemSetIterator : public StackObj {
private:
// The region RSet over which we are iterating.
HeapRegionRemSet* _hrrs;
// Local caching of HRRS fields.
const BitMap* _coarse_map;
G1BlockOffsetTable* _bot;
G1CollectedHeap* _g1h;
// The number of cards yielded since initialization.
size_t _n_yielded_fine;
size_t _n_yielded_coarse;
size_t _n_yielded_sparse;
// Indicates what granularity of table that we are currently iterating over.
// We start iterating over the sparse table, progress to the fine grain
// table, and then finish with the coarse table.
enum IterState {
Sparse,
Fine,
Coarse
};
IterState _is;
// For both Coarse and Fine remembered set iteration this contains the
// first card number of the heap region we currently iterate over.
size_t _cur_region_card_offset;
// Current region index for the Coarse remembered set iteration.
int _coarse_cur_region_index;
size_t _coarse_cur_region_cur_card;
bool coarse_has_next(size_t& card_index);
// The PRT we are currently iterating over.
PerRegionTable* _fine_cur_prt;
// Card offset within the current PRT.
size_t _cur_card_in_prt;
// Update internal variables when switching to the given PRT.
void switch_to_prt(PerRegionTable* prt);
bool fine_has_next();
bool fine_has_next(size_t& card_index);
// The Sparse remembered set iterator.
SparsePRTIter _sparse_iter;
public:
HeapRegionRemSetIterator(HeapRegionRemSet* hrrs);
// If there remains one or more cards to be yielded, returns true and
// sets "card_index" to one of those cards (which is then considered
// yielded.) Otherwise, returns false (and leaves "card_index"
// undefined.)
bool has_next(size_t& card_index);
size_t n_yielded_fine() { return _n_yielded_fine; }
size_t n_yielded_coarse() { return _n_yielded_coarse; }
size_t n_yielded_sparse() { return _n_yielded_sparse; }
size_t n_yielded() {
return n_yielded_fine() + n_yielded_coarse() + n_yielded_sparse();
}
};
#endif // SHARE_GC_G1_HEAPREGIONREMSET_HPP

View File

@ -0,0 +1,119 @@
/*
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_GC_G1_HEAPREGIONREMSET_INLINE_HPP
#define SHARE_VM_GC_G1_HEAPREGIONREMSET_INLINE_HPP
#include "gc/g1/heapRegion.inline.hpp"
#include "gc/g1/heapRegionRemSet.hpp"
#include "gc/g1/sparsePRT.hpp"
#include "utilities/bitMap.inline.hpp"
template <class Closure>
inline void HeapRegionRemSet::iterate_prts(Closure& cl) {
_other_regions.iterate(cl);
}
inline void PerRegionTable::add_card_work(CardIdx_t from_card, bool par) {
if (!_bm.at(from_card)) {
if (par) {
if (_bm.par_set_bit(from_card)) {
Atomic::inc(&_occupied);
}
} else {
_bm.set_bit(from_card);
_occupied++;
}
}
}
inline void PerRegionTable::add_reference_work(OopOrNarrowOopStar from, bool par) {
// Must make this robust in case "from" is not in "_hr", because of
// concurrency.
HeapRegion* loc_hr = hr();
// If the test below fails, then this table was reused concurrently
// with this operation. This is OK, since the old table was coarsened,
// and adding a bit to the new table is never incorrect.
if (loc_hr->is_in_reserved(from)) {
CardIdx_t from_card = OtherRegionsTable::card_within_region(from, loc_hr);
add_card_work(from_card, par);
}
}
inline void PerRegionTable::add_card(CardIdx_t from_card_index) {
add_card_work(from_card_index, /*parallel*/ true);
}
inline void PerRegionTable::seq_add_card(CardIdx_t from_card_index) {
add_card_work(from_card_index, /*parallel*/ false);
}
inline void PerRegionTable::add_reference(OopOrNarrowOopStar from) {
add_reference_work(from, /*parallel*/ true);
}
inline void PerRegionTable::seq_add_reference(OopOrNarrowOopStar from) {
add_reference_work(from, /*parallel*/ false);
}
inline void PerRegionTable::init(HeapRegion* hr, bool clear_links_to_all_list) {
if (clear_links_to_all_list) {
set_next(NULL);
set_prev(NULL);
}
_collision_list_next = NULL;
_occupied = 0;
_bm.clear();
// Make sure that the bitmap clearing above has been finished before publishing
// this PRT to concurrent threads.
OrderAccess::release_store(&_hr, hr);
}
template <class Closure>
void OtherRegionsTable::iterate(Closure& cl) {
if (_n_coarse_entries > 0) {
BitMap::idx_t cur = _coarse_map.get_next_one_offset(0);
while (cur != _coarse_map.size()) {
cl.next_coarse_prt((uint)cur);
cur = _coarse_map.get_next_one_offset(cur + 1);
}
}
{
PerRegionTable* cur = _first_all_fine_prts;
while (cur != NULL) {
cl.next_fine_prt(cur->hr()->hrm_index(), cur->bm());
cur = cur->next();
}
}
{
SparsePRTBucketIter iter(&_sparse_table);
SparsePRTEntry* cur;
while (iter.has_next(cur)) {
cl.next_sparse_prt(cur->r_ind(), cur->cards(), cur->num_valid_cards());
}
}
}
#endif // SHARE_VM_GC_G1_HEAPREGIONREMSET_INLINE_HPP

View File

@ -275,6 +275,19 @@ bool RSHashTableIter::has_next(size_t& card_index) {
return false;
}
bool RSHashTableBucketIter::has_next(SparsePRTEntry*& entry) {
while (_bl_ind == RSHashTable::NullEntry) {
if (_tbl_ind == (int)_rsht->capacity() - 1) {
return false;
}
_tbl_ind++;
_bl_ind = _rsht->_buckets[_tbl_ind];
}
entry = _rsht->entry(_bl_ind);
_bl_ind = entry->next_index();
return true;
}
bool RSHashTable::contains_card(RegionIdx_t region_index, CardIdx_t card_index) const {
SparsePRTEntry* e = get_entry(region_index);
return (e != NULL && e->contains_card(card_index));

View File

@ -38,10 +38,11 @@
// that might contain pointers into the owner region.
class SparsePRTEntry: public CHeapObj<mtGC> {
private:
public:
// The type of a card entry.
typedef uint16_t card_elem_t;
private:
// We need to make sizeof(SparsePRTEntry) an even multiple of maximum member size,
// in order to force correct alignment that could otherwise cause SIGBUS errors
// when reading the member variables. This calculates the minimum number of card
@ -96,6 +97,8 @@ public:
// Copy the current entry's cards into the "_card" array of "e."
inline void copy_cards(SparsePRTEntry* e) const;
card_elem_t* cards() { return _cards; }
inline CardIdx_t card(int i) const {
assert(i >= 0, "must be nonnegative");
assert(i < cards_num(), "range checking");
@ -106,7 +109,7 @@ public:
class RSHashTable : public CHeapObj<mtGC> {
friend class RSHashTableIter;
friend class RSHashTableBucketIter;
// Inverse maximum hash table occupancy used.
static float TableOccupancyFactor;
@ -209,12 +212,29 @@ public:
bool has_next(size_t& card_index);
};
// This is embedded in HRRS iterator.
class RSHashTableBucketIter {
int _tbl_ind; // [-1, 0.._rsht->_capacity)
int _bl_ind; // [-1, 0.._rsht->_capacity)
RSHashTable* _rsht;
public:
RSHashTableBucketIter(RSHashTable* rsht) :
_tbl_ind(0),
_bl_ind(rsht->_buckets[_tbl_ind]),
_rsht(rsht) { }
bool has_next(SparsePRTEntry*& entry);
};
// Concurrent access to a SparsePRT must be serialized by some external mutex.
class SparsePRTIter;
class SparsePRT {
friend class SparsePRTIter;
friend class SparsePRTBucketIter;
RSHashTable* _table;
@ -262,4 +282,14 @@ public:
}
};
class SparsePRTBucketIter: public RSHashTableBucketIter {
public:
SparsePRTBucketIter(const SparsePRT* sprt) :
RSHashTableBucketIter(sprt->_table) {}
bool has_next(SparsePRTEntry*& entry) {
return RSHashTableBucketIter::has_next(entry);
}
};
#endif // SHARE_GC_G1_SPARSEPRT_HPP

View File

@ -103,15 +103,11 @@ protected:
enum CardValues {
clean_card = (CardValue)-1,
// The mask contains zeros in places for all other values.
clean_card_mask = clean_card - 31,
dirty_card = 0,
precleaned_card = 1,
claimed_card = 2,
deferred_card = 4,
last_card = 8,
CT_MR_BS_last_reserved = 16
last_card = 2,
CT_MR_BS_last_reserved = 4
};
// a word's worth (row) of clean card values
@ -242,11 +238,8 @@ public:
};
static CardValue clean_card_val() { return clean_card; }
static CardValue clean_card_mask_val() { return clean_card_mask; }
static CardValue dirty_card_val() { return dirty_card; }
static CardValue claimed_card_val() { return claimed_card; }
static CardValue precleaned_card_val() { return precleaned_card; }
static CardValue deferred_card_val() { return deferred_card; }
static intptr_t clean_card_row_val() { return clean_card_row; }
// Card marking array base (adjusted for heap low boundary)

View File

@ -34,7 +34,7 @@ template <class T>
class WorkerDataArray : public CHeapObj<mtGC> {
friend class WDAPrinter;
public:
static const uint MaxThreadWorkItems = 5;
static const uint MaxThreadWorkItems = 6;
private:
T* _data;
uint _length;

View File

@ -101,7 +101,7 @@ size_t WorkerDataArray<T>::get_thread_work_item(uint worker_i, uint index) {
template <typename T>
void WorkerDataArray<T>::add(uint worker_i, T value) {
assert(worker_i < _length, "Worker %d is greater than max: %d", worker_i, _length);
assert(_data[worker_i] != uninitialized(), "No data to add to for worker %d", worker_i);
assert(_data[worker_i] != uninitialized(), "No data to add to %s for worker %d", _title, worker_i);
_data[worker_i] += value;
}

View File

@ -95,21 +95,28 @@ public class TestGCLogMessages {
new LogMessageWithLevel("Post Evacuate Collection Set", Level.INFO),
new LogMessageWithLevel("Other", Level.INFO),
// Update RS
new LogMessageWithLevel("Update RS", Level.DEBUG),
// Merge Heap Roots
new LogMessageWithLevel("Merge Heap Roots", Level.INFO),
new LogMessageWithLevel("Remembered Sets", Level.DEBUG),
new LogMessageWithLevel("Merged Sparse", Level.DEBUG),
new LogMessageWithLevel("Merged Fine", Level.DEBUG),
new LogMessageWithLevel("Merged Coarse", Level.DEBUG),
new LogMessageWithLevel("Hot Card Cache", Level.DEBUG),
new LogMessageWithLevel("Log Buffers", Level.DEBUG),
new LogMessageWithLevel("Processed Buffers", Level.DEBUG),
new LogMessageWithLevel("Scanned Cards", Level.DEBUG),
new LogMessageWithLevel("Dirty Cards", Level.DEBUG),
new LogMessageWithLevel("Skipped Cards", Level.DEBUG),
new LogMessageWithLevel("Scan HCC", Level.DEBUG),
// Scan RS
new LogMessageWithLevel("Scan RS", Level.DEBUG),
// Scan Heap Roots
new LogMessageWithLevel("Scan Heap Roots", Level.DEBUG),
new LogMessageWithLevel("Scanned Cards", Level.DEBUG),
new LogMessageWithLevel("Claimed Cards", Level.DEBUG),
new LogMessageWithLevel("Skipped Cards", Level.DEBUG),
new LogMessageWithLevel("Scanned Blocks", Level.DEBUG),
new LogMessageWithLevel("Claimed Chunks", Level.DEBUG),
// Code Roots Scan
new LogMessageWithLevel("Code Root Scan", Level.DEBUG),
// Object Copy
new LogMessageWithLevel("Object Copy", Level.DEBUG),
new LogMessageWithLevel("Scanned Cards", Level.DEBUG),
new LogMessageWithLevel("Claimed Cards", Level.DEBUG),
new LogMessageWithLevel("LAB Waste", Level.DEBUG),
new LogMessageWithLevel("LAB Undo Waste", Level.DEBUG),
// Ext Root Scan
new LogMessageWithLevel("Thread Roots", Level.TRACE),
new LogMessageWithLevel("Universe Roots", Level.TRACE),
@ -133,6 +140,7 @@ public class TestGCLogMessages {
new LogMessageWithLevel("Table Fixup", Level.DEBUG),
new LogMessageWithLevel("Expand Heap After Collection", Level.DEBUG),
new LogMessageWithLevel("Region Register", Level.DEBUG),
new LogMessageWithLevel("Prepare Heap Roots", Level.DEBUG),
// Free CSet
new LogMessageWithLevel("Free Collection Set", Level.DEBUG),
new LogMessageWithLevel("Free Collection Set Serial", Level.TRACE),

View File

@ -100,20 +100,30 @@ public class TestG1ParallelPhases {
"CMRefRoots",
"WaitForStrongCLD",
"WeakCLDRoots",
"UpdateRS",
"ScanHCC",
"ScanRS",
"MergeHCC",
"MergeRS",
"MergeLB",
"ScanHR",
"CodeRoots",
"ObjCopy",
"Termination",
"StringDedupQueueFixup",
"StringDedupTableFixup",
"RedirtyCards",
// "PreserveCMReferents",
"NonYoungFreeCSet",
"YoungFreeCSet"
);
// Some GC phases may or may not occur depending on environment. Filter them out
// since we can not reliably guarantee that they occur (or not).
Set<String> optPhases = of(
"OptScanHR",
"OptMergeRS",
"OptCodeRoots",
"OptObjCopy"
);
usedPhases.removeAll(optPhases);
assertTrue(usedPhases.equals(allPhases), "Compare events expected and received"
+ ", Not found phases: " + allPhases.stream().filter(p -> !usedPhases.contains(p)).collect(joining(", "))
+ ", Not expected phases: " + usedPhases.stream().filter(p -> !allPhases.contains(p)).collect(joining(", ")));