8325553: Parallel: Use per-marker cache for marking stats during Full GC

Reviewed-by: sjohanss, tschatzl
This commit is contained in:
Albert Mingkun Yang 2024-02-26 15:34:10 +00:00
parent 20c71ceacd
commit 2d3c9c5e67
5 changed files with 119 additions and 2 deletions

@ -61,6 +61,7 @@ ParCompactionManager::ParCompactionManager() {
reset_bitmap_query_cache();
_deferred_obj_array = new (mtGC) GrowableArray<HeapWord*>(10, mtGC);
_marking_stats_cache = nullptr;
}
void ParCompactionManager::initialize(ParMarkBitMap* mbm) {

@ -111,7 +111,35 @@ class ParCompactionManager : public CHeapObj<mtGC> {
static RegionTaskQueueSet* region_task_queues() { return _region_task_queues; }
OopTaskQueue* oop_stack() { return &_oop_stack; }
public:
// To collect per-region live-words in a worker local cache in order to
// reduce threads contention.
class MarkingStatsCache : public CHeapObj<mtGC> {
constexpr static size_t num_entries = 1024;
static_assert(is_power_of_2(num_entries), "inv");
static_assert(num_entries > 0, "inv");
constexpr static size_t entry_mask = num_entries - 1;
struct CacheEntry {
size_t region_id;
size_t live_words;
};
CacheEntry entries[num_entries] = {};
inline void push(size_t region_id, size_t live_words);
public:
inline void push(oop obj, size_t live_words);
inline void evict(size_t index);
inline void evict_all();
};
MarkingStatsCache* _marking_stats_cache;
public:
static const size_t InvalidShadow = ~0;
static size_t pop_shadow_region_mt_safe(PSParallelCompact::RegionData* region_ptr);
static void push_shadow_region_mt_safe(size_t shadow_region);
@ -198,6 +226,10 @@ class ParCompactionManager : public CHeapObj<mtGC> {
virtual void do_void();
};
inline void create_marking_stats_cache();
inline void flush_and_destroy_marking_stats_cache();
// Called after marking.
static void verify_all_marking_stack_empty() NOT_DEBUG_RETURN;

@ -107,6 +107,8 @@ inline void ParCompactionManager::mark_and_push(T* p) {
assert(ParallelScavengeHeap::heap()->is_in(obj), "should be in heap");
if (mark_bitmap()->is_unmarked(obj) && PSParallelCompact::mark_obj(obj)) {
assert(_marking_stats_cache != nullptr, "inv");
_marking_stats_cache->push(obj, obj->size());
push(obj);
if (StringDedup::is_enabled() &&
@ -174,4 +176,73 @@ inline void ParCompactionManager::follow_contents(oop obj) {
}
}
inline void ParCompactionManager::MarkingStatsCache::push(size_t region_id, size_t live_words) {
size_t index = (region_id & entry_mask);
if (entries[index].region_id == region_id) {
// Hit
entries[index].live_words += live_words;
return;
}
// Miss
if (entries[index].live_words != 0) {
evict(index);
}
entries[index].region_id = region_id;
entries[index].live_words = live_words;
}
inline void ParCompactionManager::MarkingStatsCache::push(oop obj, size_t live_words) {
ParallelCompactData& data = PSParallelCompact::summary_data();
const size_t region_size = ParallelCompactData::RegionSize;
HeapWord* addr = cast_from_oop<HeapWord*>(obj);
const size_t start_region_id = data.addr_to_region_idx(addr);
const size_t end_region_id = data.addr_to_region_idx(addr + live_words - 1);
if (start_region_id == end_region_id) {
// Completely inside this region
push(start_region_id, live_words);
return;
}
// First region
push(start_region_id, region_size - data.region_offset(addr));
// Middle regions; bypass cache
for (size_t i = start_region_id + 1; i < end_region_id; ++i) {
data.region(i)->set_partial_obj_size(region_size);
data.region(i)->set_partial_obj_addr(addr);
}
// Last region; bypass cache
const size_t end_offset = data.region_offset(addr + live_words - 1);
data.region(end_region_id)->set_partial_obj_size(end_offset + 1);
data.region(end_region_id)->set_partial_obj_addr(addr);
}
inline void ParCompactionManager::MarkingStatsCache::evict(size_t index) {
ParallelCompactData& data = PSParallelCompact::summary_data();
// flush to global data
data.region(entries[index].region_id)->add_live_obj(entries[index].live_words);
}
inline void ParCompactionManager::MarkingStatsCache::evict_all() {
for (size_t i = 0; i < num_entries; ++i) {
if (entries[i].live_words != 0) {
evict(i);
entries[i].live_words = 0;
}
}
}
inline void ParCompactionManager::create_marking_stats_cache() {
assert(_marking_stats_cache == nullptr, "precondition");
_marking_stats_cache = new MarkingStatsCache();
}
inline void ParCompactionManager::flush_and_destroy_marking_stats_cache() {
_marking_stats_cache->evict_all();
delete _marking_stats_cache;
_marking_stats_cache = nullptr;
}
#endif // SHARE_GC_PARALLEL_PSCOMPACTIONMANAGER_INLINE_HPP

@ -1969,6 +1969,7 @@ public:
virtual void work(uint worker_id) {
ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id);
cm->create_marking_stats_cache();
PCMarkAndPushClosure mark_and_push_closure(cm);
{
@ -2017,6 +2018,13 @@ public:
}
};
static void flush_marking_stats_cache(const uint num_workers) {
for (uint i = 0; i < num_workers; ++i) {
ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(i);
cm->flush_and_destroy_marking_stats_cache();
}
}
void PSParallelCompact::marking_phase(ParallelOldTracer *gc_tracer) {
// Recursively traverse all live objects and mark them
GCTraceTime(Info, gc, phases) tm("Marking Phase", &_gc_timer);
@ -2046,6 +2054,12 @@ void PSParallelCompact::marking_phase(ParallelOldTracer *gc_tracer) {
pt.print_all_references();
}
{
GCTraceTime(Debug, gc, phases) tm("Flush Marking Stats", &_gc_timer);
flush_marking_stats_cache(active_gc_threads);
}
// This is the point where the entire marking should have completed.
ParCompactionManager::verify_all_marking_stack_empty();

@ -100,7 +100,6 @@ inline void PSParallelCompact::check_new_location(HeapWord* old_addr, HeapWord*
inline bool PSParallelCompact::mark_obj(oop obj) {
const size_t obj_size = obj->size();
if (mark_bitmap()->mark_obj(obj, obj_size)) {
_summary_data.add_obj(obj, obj_size);
ContinuationGCSupport::transform_stack_chunk(obj);
return true;
} else {