8337709: Use allocated states for chunking large array processing
Reviewed-by: iwalulya, tschatzl
This commit is contained in:
parent
358d77dafb
commit
6a3d045221
@ -35,6 +35,7 @@
|
|||||||
#include "gc/g1/g1Trace.hpp"
|
#include "gc/g1/g1Trace.hpp"
|
||||||
#include "gc/g1/g1YoungGCAllocationFailureInjector.inline.hpp"
|
#include "gc/g1/g1YoungGCAllocationFailureInjector.inline.hpp"
|
||||||
#include "gc/shared/continuationGCSupport.inline.hpp"
|
#include "gc/shared/continuationGCSupport.inline.hpp"
|
||||||
|
#include "gc/shared/partialArrayState.hpp"
|
||||||
#include "gc/shared/partialArrayTaskStepper.inline.hpp"
|
#include "gc/shared/partialArrayTaskStepper.inline.hpp"
|
||||||
#include "gc/shared/preservedMarks.inline.hpp"
|
#include "gc/shared/preservedMarks.inline.hpp"
|
||||||
#include "gc/shared/stringdedup/stringDedup.hpp"
|
#include "gc/shared/stringdedup/stringDedup.hpp"
|
||||||
@ -43,6 +44,7 @@
|
|||||||
#include "oops/access.inline.hpp"
|
#include "oops/access.inline.hpp"
|
||||||
#include "oops/oop.inline.hpp"
|
#include "oops/oop.inline.hpp"
|
||||||
#include "runtime/atomic.hpp"
|
#include "runtime/atomic.hpp"
|
||||||
|
#include "runtime/mutexLocker.hpp"
|
||||||
#include "runtime/prefetch.inline.hpp"
|
#include "runtime/prefetch.inline.hpp"
|
||||||
#include "utilities/globalDefinitions.hpp"
|
#include "utilities/globalDefinitions.hpp"
|
||||||
#include "utilities/macros.hpp"
|
#include "utilities/macros.hpp"
|
||||||
@ -61,7 +63,8 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
|
|||||||
uint worker_id,
|
uint worker_id,
|
||||||
uint num_workers,
|
uint num_workers,
|
||||||
G1CollectionSet* collection_set,
|
G1CollectionSet* collection_set,
|
||||||
G1EvacFailureRegions* evac_failure_regions)
|
G1EvacFailureRegions* evac_failure_regions,
|
||||||
|
PartialArrayStateAllocator* pas_allocator)
|
||||||
: _g1h(g1h),
|
: _g1h(g1h),
|
||||||
_task_queue(g1h->task_queue(worker_id)),
|
_task_queue(g1h->task_queue(worker_id)),
|
||||||
_rdc_local_qset(rdcqs),
|
_rdc_local_qset(rdcqs),
|
||||||
@ -80,8 +83,8 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
|
|||||||
_surviving_young_words(nullptr),
|
_surviving_young_words(nullptr),
|
||||||
_surviving_words_length(collection_set->young_region_length() + 1),
|
_surviving_words_length(collection_set->young_region_length() + 1),
|
||||||
_old_gen_is_full(false),
|
_old_gen_is_full(false),
|
||||||
_partial_objarray_chunk_size(ParGCArrayScanChunk),
|
_partial_array_state_allocator(pas_allocator),
|
||||||
_partial_array_stepper(num_workers),
|
_partial_array_stepper(num_workers, ParGCArrayScanChunk),
|
||||||
_string_dedup_requests(),
|
_string_dedup_requests(),
|
||||||
_max_num_optional_regions(collection_set->optional_region_length()),
|
_max_num_optional_regions(collection_set->optional_region_length()),
|
||||||
_numa(g1h->numa()),
|
_numa(g1h->numa()),
|
||||||
@ -169,9 +172,9 @@ void G1ParScanThreadState::verify_task(oop* task) const {
|
|||||||
"task=" PTR_FORMAT " p=" PTR_FORMAT, p2i(task), p2i(p));
|
"task=" PTR_FORMAT " p=" PTR_FORMAT, p2i(task), p2i(p));
|
||||||
}
|
}
|
||||||
|
|
||||||
void G1ParScanThreadState::verify_task(PartialArrayScanTask task) const {
|
void G1ParScanThreadState::verify_task(PartialArrayState* task) const {
|
||||||
// Must be in the collection set--it's already been copied.
|
// Must be in the collection set--it's already been copied.
|
||||||
oop p = task.to_source_array();
|
oop p = task->source();
|
||||||
assert(_g1h->is_in_cset(p), "p=" PTR_FORMAT, p2i(p));
|
assert(_g1h->is_in_cset(p), "p=" PTR_FORMAT, p2i(p));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -180,8 +183,8 @@ void G1ParScanThreadState::verify_task(ScannerTask task) const {
|
|||||||
verify_task(task.to_narrow_oop_ptr());
|
verify_task(task.to_narrow_oop_ptr());
|
||||||
} else if (task.is_oop_ptr()) {
|
} else if (task.is_oop_ptr()) {
|
||||||
verify_task(task.to_oop_ptr());
|
verify_task(task.to_oop_ptr());
|
||||||
} else if (task.is_partial_array_task()) {
|
} else if (task.is_partial_array_state()) {
|
||||||
verify_task(task.to_partial_array_task());
|
verify_task(task.to_partial_array_state());
|
||||||
} else {
|
} else {
|
||||||
ShouldNotReachHere();
|
ShouldNotReachHere();
|
||||||
}
|
}
|
||||||
@ -223,34 +226,39 @@ void G1ParScanThreadState::do_oop_evac(T* p) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
MAYBE_INLINE_EVACUATION
|
MAYBE_INLINE_EVACUATION
|
||||||
void G1ParScanThreadState::do_partial_array(PartialArrayScanTask task) {
|
void G1ParScanThreadState::do_partial_array(PartialArrayState* state) {
|
||||||
oop from_obj = task.to_source_array();
|
oop to_obj = state->destination();
|
||||||
|
|
||||||
|
#ifdef ASSERT
|
||||||
|
oop from_obj = state->source();
|
||||||
assert(_g1h->is_in_reserved(from_obj), "must be in heap.");
|
assert(_g1h->is_in_reserved(from_obj), "must be in heap.");
|
||||||
assert(from_obj->is_objArray(), "must be obj array");
|
assert(from_obj->is_objArray(), "must be obj array");
|
||||||
assert(from_obj->is_forwarded(), "must be forwarded");
|
assert(from_obj->is_forwarded(), "must be forwarded");
|
||||||
|
|
||||||
oop to_obj = from_obj->forwardee();
|
|
||||||
assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
|
assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
|
||||||
assert(to_obj->is_objArray(), "must be obj array");
|
assert(to_obj->is_objArray(), "must be obj array");
|
||||||
|
#endif // ASSERT
|
||||||
|
|
||||||
objArrayOop to_array = objArrayOop(to_obj);
|
objArrayOop to_array = objArrayOop(to_obj);
|
||||||
|
|
||||||
PartialArrayTaskStepper::Step step
|
// Claim a chunk and get number of additional tasks to enqueue.
|
||||||
= _partial_array_stepper.next(objArrayOop(from_obj),
|
PartialArrayTaskStepper::Step step = _partial_array_stepper.next(state);
|
||||||
to_array,
|
// Push any additional partial scan tasks needed. Pushed before processing
|
||||||
_partial_objarray_chunk_size);
|
// the claimed chunk to allow other workers to steal while we're processing.
|
||||||
for (uint i = 0; i < step._ncreate; ++i) {
|
if (step._ncreate > 0) {
|
||||||
push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
|
state->add_references(step._ncreate);
|
||||||
|
for (uint i = 0; i < step._ncreate; ++i) {
|
||||||
|
push_on_queue(ScannerTask(state));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
G1HeapRegionAttr dest_attr = _g1h->region_attr(to_array);
|
G1HeapRegionAttr dest_attr = _g1h->region_attr(to_array);
|
||||||
G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_new_survivor());
|
G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_new_survivor());
|
||||||
// Process claimed task. The length of to_array is not correct, but
|
// Process claimed task.
|
||||||
// fortunately the iteration ignores the length field and just relies
|
|
||||||
// on start/end.
|
|
||||||
to_array->oop_iterate_range(&_scanner,
|
to_array->oop_iterate_range(&_scanner,
|
||||||
step._index,
|
checked_cast<int>(step._index),
|
||||||
step._index + _partial_objarray_chunk_size);
|
checked_cast<int>(step._index + _partial_array_stepper.chunk_size()));
|
||||||
|
// Release reference to the state, now that we're done with it.
|
||||||
|
_partial_array_state_allocator->release(_worker_id, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
MAYBE_INLINE_EVACUATION
|
MAYBE_INLINE_EVACUATION
|
||||||
@ -260,20 +268,30 @@ void G1ParScanThreadState::start_partial_objarray(G1HeapRegionAttr dest_attr,
|
|||||||
assert(from_obj->is_objArray(), "precondition");
|
assert(from_obj->is_objArray(), "precondition");
|
||||||
assert(from_obj->is_forwarded(), "precondition");
|
assert(from_obj->is_forwarded(), "precondition");
|
||||||
assert(from_obj->forwardee() == to_obj, "precondition");
|
assert(from_obj->forwardee() == to_obj, "precondition");
|
||||||
assert(from_obj != to_obj, "should not be scanning self-forwarded objects");
|
|
||||||
assert(to_obj->is_objArray(), "precondition");
|
assert(to_obj->is_objArray(), "precondition");
|
||||||
|
|
||||||
objArrayOop to_array = objArrayOop(to_obj);
|
objArrayOop to_array = objArrayOop(to_obj);
|
||||||
|
|
||||||
PartialArrayTaskStepper::Step step
|
size_t array_length = to_array->length();
|
||||||
= _partial_array_stepper.start(objArrayOop(from_obj),
|
PartialArrayTaskStepper::Step step = _partial_array_stepper.start(array_length);
|
||||||
to_array,
|
|
||||||
_partial_objarray_chunk_size);
|
|
||||||
|
|
||||||
// Push any needed partial scan tasks. Pushed before processing the
|
// Push any needed partial scan tasks. Pushed before processing the
|
||||||
// initial chunk to allow other workers to steal while we're processing.
|
// initial chunk to allow other workers to steal while we're processing.
|
||||||
for (uint i = 0; i < step._ncreate; ++i) {
|
if (step._ncreate > 0) {
|
||||||
push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
|
assert(step._index < array_length, "invariant");
|
||||||
|
assert(((array_length - step._index) % _partial_array_stepper.chunk_size()) == 0,
|
||||||
|
"invariant");
|
||||||
|
PartialArrayState* state =
|
||||||
|
_partial_array_state_allocator->allocate(_worker_id,
|
||||||
|
from_obj, to_obj,
|
||||||
|
step._index,
|
||||||
|
array_length,
|
||||||
|
step._ncreate);
|
||||||
|
for (uint i = 0; i < step._ncreate; ++i) {
|
||||||
|
push_on_queue(ScannerTask(state));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assert(step._index == array_length, "invariant");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip the card enqueue iff the object (to_array) is in survivor region.
|
// Skip the card enqueue iff the object (to_array) is in survivor region.
|
||||||
@ -284,9 +302,8 @@ void G1ParScanThreadState::start_partial_objarray(G1HeapRegionAttr dest_attr,
|
|||||||
G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_young());
|
G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_young());
|
||||||
// Process the initial chunk. No need to process the type in the
|
// Process the initial chunk. No need to process the type in the
|
||||||
// klass, as it will already be handled by processing the built-in
|
// klass, as it will already be handled by processing the built-in
|
||||||
// module. The length of to_array is not correct, but fortunately
|
// module.
|
||||||
// the iteration ignores that length field and relies on start/end.
|
to_array->oop_iterate_range(&_scanner, 0, checked_cast<int>(step._index));
|
||||||
to_array->oop_iterate_range(&_scanner, 0, step._index);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MAYBE_INLINE_EVACUATION
|
MAYBE_INLINE_EVACUATION
|
||||||
@ -297,7 +314,7 @@ void G1ParScanThreadState::dispatch_task(ScannerTask task) {
|
|||||||
} else if (task.is_oop_ptr()) {
|
} else if (task.is_oop_ptr()) {
|
||||||
do_oop_evac(task.to_oop_ptr());
|
do_oop_evac(task.to_oop_ptr());
|
||||||
} else {
|
} else {
|
||||||
do_partial_array(task.to_partial_array_task());
|
do_partial_array(task.to_partial_array_state());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -582,7 +599,8 @@ G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id)
|
|||||||
worker_id,
|
worker_id,
|
||||||
_num_workers,
|
_num_workers,
|
||||||
_collection_set,
|
_collection_set,
|
||||||
_evac_failure_regions);
|
_evac_failure_regions,
|
||||||
|
&_partial_array_state_allocator);
|
||||||
}
|
}
|
||||||
return _states[worker_id];
|
return _states[worker_id];
|
||||||
}
|
}
|
||||||
@ -715,7 +733,9 @@ G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
|
|||||||
_surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, collection_set->young_region_length() + 1, mtGC)),
|
_surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, collection_set->young_region_length() + 1, mtGC)),
|
||||||
_num_workers(num_workers),
|
_num_workers(num_workers),
|
||||||
_flushed(false),
|
_flushed(false),
|
||||||
_evac_failure_regions(evac_failure_regions) {
|
_evac_failure_regions(evac_failure_regions),
|
||||||
|
_partial_array_state_allocator(num_workers)
|
||||||
|
{
|
||||||
_preserved_marks_set.init(num_workers);
|
_preserved_marks_set.init(num_workers);
|
||||||
for (uint i = 0; i < num_workers; ++i) {
|
for (uint i = 0; i < num_workers; ++i) {
|
||||||
_states[i] = nullptr;
|
_states[i] = nullptr;
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
#include "gc/shared/ageTable.hpp"
|
#include "gc/shared/ageTable.hpp"
|
||||||
#include "gc/shared/copyFailedInfo.hpp"
|
#include "gc/shared/copyFailedInfo.hpp"
|
||||||
#include "gc/shared/gc_globals.hpp"
|
#include "gc/shared/gc_globals.hpp"
|
||||||
|
#include "gc/shared/partialArrayState.hpp"
|
||||||
#include "gc/shared/partialArrayTaskStepper.hpp"
|
#include "gc/shared/partialArrayTaskStepper.hpp"
|
||||||
#include "gc/shared/preservedMarks.hpp"
|
#include "gc/shared/preservedMarks.hpp"
|
||||||
#include "gc/shared/stringdedup/stringDedup.hpp"
|
#include "gc/shared/stringdedup/stringDedup.hpp"
|
||||||
@ -87,7 +88,8 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
|
|||||||
// available for allocation.
|
// available for allocation.
|
||||||
bool _old_gen_is_full;
|
bool _old_gen_is_full;
|
||||||
// Size (in elements) of a partial objArray task chunk.
|
// Size (in elements) of a partial objArray task chunk.
|
||||||
int _partial_objarray_chunk_size;
|
size_t _partial_objarray_chunk_size;
|
||||||
|
PartialArrayStateAllocator* _partial_array_state_allocator;
|
||||||
PartialArrayTaskStepper _partial_array_stepper;
|
PartialArrayTaskStepper _partial_array_stepper;
|
||||||
StringDedup::Requests _string_dedup_requests;
|
StringDedup::Requests _string_dedup_requests;
|
||||||
|
|
||||||
@ -129,7 +131,8 @@ public:
|
|||||||
uint worker_id,
|
uint worker_id,
|
||||||
uint num_workers,
|
uint num_workers,
|
||||||
G1CollectionSet* collection_set,
|
G1CollectionSet* collection_set,
|
||||||
G1EvacFailureRegions* evac_failure_regions);
|
G1EvacFailureRegions* evac_failure_regions,
|
||||||
|
PartialArrayStateAllocator* partial_array_state_allocator);
|
||||||
virtual ~G1ParScanThreadState();
|
virtual ~G1ParScanThreadState();
|
||||||
|
|
||||||
void set_ref_discoverer(ReferenceDiscoverer* rd) { _scanner.set_ref_discoverer(rd); }
|
void set_ref_discoverer(ReferenceDiscoverer* rd) { _scanner.set_ref_discoverer(rd); }
|
||||||
@ -140,7 +143,7 @@ public:
|
|||||||
|
|
||||||
void verify_task(narrowOop* task) const NOT_DEBUG_RETURN;
|
void verify_task(narrowOop* task) const NOT_DEBUG_RETURN;
|
||||||
void verify_task(oop* task) const NOT_DEBUG_RETURN;
|
void verify_task(oop* task) const NOT_DEBUG_RETURN;
|
||||||
void verify_task(PartialArrayScanTask task) const NOT_DEBUG_RETURN;
|
void verify_task(PartialArrayState* task) const NOT_DEBUG_RETURN;
|
||||||
void verify_task(ScannerTask task) const NOT_DEBUG_RETURN;
|
void verify_task(ScannerTask task) const NOT_DEBUG_RETURN;
|
||||||
|
|
||||||
void push_on_queue(ScannerTask task);
|
void push_on_queue(ScannerTask task);
|
||||||
@ -169,7 +172,7 @@ public:
|
|||||||
size_t flush_stats(size_t* surviving_young_words, uint num_workers, BufferNodeList* buffer_log);
|
size_t flush_stats(size_t* surviving_young_words, uint num_workers, BufferNodeList* buffer_log);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void do_partial_array(PartialArrayScanTask task);
|
void do_partial_array(PartialArrayState* state);
|
||||||
void start_partial_objarray(G1HeapRegionAttr dest_dir, oop from, oop to);
|
void start_partial_objarray(G1HeapRegionAttr dest_dir, oop from, oop to);
|
||||||
|
|
||||||
HeapWord* allocate_copy_slow(G1HeapRegionAttr* dest_attr,
|
HeapWord* allocate_copy_slow(G1HeapRegionAttr* dest_attr,
|
||||||
@ -252,6 +255,7 @@ class G1ParScanThreadStateSet : public StackObj {
|
|||||||
uint _num_workers;
|
uint _num_workers;
|
||||||
bool _flushed;
|
bool _flushed;
|
||||||
G1EvacFailureRegions* _evac_failure_regions;
|
G1EvacFailureRegions* _evac_failure_regions;
|
||||||
|
PartialArrayStateAllocator _partial_array_state_allocator;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
G1ParScanThreadStateSet(G1CollectedHeap* g1h,
|
G1ParScanThreadStateSet(G1CollectedHeap* g1h,
|
||||||
|
152
src/hotspot/share/gc/shared/partialArrayState.cpp
Normal file
152
src/hotspot/share/gc/shared/partialArrayState.cpp
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "precompiled.hpp"
|
||||||
|
#include "gc/shared/partialArrayState.hpp"
|
||||||
|
#include "memory/allocation.inline.hpp"
|
||||||
|
#include "memory/arena.hpp"
|
||||||
|
#include "nmt/memflags.hpp"
|
||||||
|
#include "oops/oopsHierarchy.hpp"
|
||||||
|
#include "runtime/atomic.hpp"
|
||||||
|
#include "runtime/orderAccess.hpp"
|
||||||
|
#include "utilities/debug.hpp"
|
||||||
|
#include "utilities/globalDefinitions.hpp"
|
||||||
|
#include "utilities/macros.hpp"
|
||||||
|
#include <new>
|
||||||
|
|
||||||
|
PartialArrayState::PartialArrayState(oop src, oop dst,
|
||||||
|
size_t index, size_t length,
|
||||||
|
size_t initial_refcount)
|
||||||
|
: _source(src),
|
||||||
|
_destination(dst),
|
||||||
|
_length(length),
|
||||||
|
_index(index),
|
||||||
|
_refcount(initial_refcount)
|
||||||
|
{
|
||||||
|
assert(index <= length, "precondition");
|
||||||
|
}
|
||||||
|
|
||||||
|
void PartialArrayState::add_references(size_t count) {
|
||||||
|
size_t new_count = Atomic::add(&_refcount, count, memory_order_relaxed);
|
||||||
|
assert(new_count >= count, "reference count overflow");
|
||||||
|
}
|
||||||
|
|
||||||
|
class PartialArrayStateAllocator::Impl : public CHeapObj<mtGC> {
|
||||||
|
struct FreeListEntry;
|
||||||
|
|
||||||
|
Arena* _arenas;
|
||||||
|
FreeListEntry** _free_lists;
|
||||||
|
uint _num_workers;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Impl(uint num_workers);
|
||||||
|
~Impl();
|
||||||
|
|
||||||
|
NONCOPYABLE(Impl);
|
||||||
|
|
||||||
|
PartialArrayState* allocate(uint worker_id,
|
||||||
|
oop src, oop dst,
|
||||||
|
size_t index, size_t length,
|
||||||
|
size_t initial_refcount);
|
||||||
|
void release(uint worker_id, PartialArrayState* state);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PartialArrayStateAllocator::Impl::FreeListEntry {
|
||||||
|
FreeListEntry* _next;
|
||||||
|
|
||||||
|
FreeListEntry(FreeListEntry* next) : _next(next) {}
|
||||||
|
~FreeListEntry() = default;
|
||||||
|
|
||||||
|
NONCOPYABLE(FreeListEntry);
|
||||||
|
};
|
||||||
|
|
||||||
|
PartialArrayStateAllocator::Impl::Impl(uint num_workers)
|
||||||
|
: _arenas(NEW_C_HEAP_ARRAY(Arena, num_workers, mtGC)),
|
||||||
|
_free_lists(NEW_C_HEAP_ARRAY(FreeListEntry*, num_workers, mtGC)),
|
||||||
|
_num_workers(num_workers)
|
||||||
|
{
|
||||||
|
for (uint i = 0; i < _num_workers; ++i) {
|
||||||
|
::new (&_arenas[i]) Arena(mtGC);
|
||||||
|
_free_lists[i] = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PartialArrayStateAllocator::Impl::~Impl() {
|
||||||
|
// We don't need to clean up the free lists. Deallocating the entries
|
||||||
|
// does nothing, since we're using arena allocation. Instead, leave it
|
||||||
|
// to the arena destructor to release the memory.
|
||||||
|
FREE_C_HEAP_ARRAY(FreeListEntry*, _free_lists);
|
||||||
|
for (uint i = 0; i < _num_workers; ++i) {
|
||||||
|
_arenas[i].~Arena();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PartialArrayState* PartialArrayStateAllocator::Impl::allocate(uint worker_id,
|
||||||
|
oop src, oop dst,
|
||||||
|
size_t index,
|
||||||
|
size_t length,
|
||||||
|
size_t initial_refcount) {
|
||||||
|
void* p;
|
||||||
|
FreeListEntry* head = _free_lists[worker_id];
|
||||||
|
if (head == nullptr) {
|
||||||
|
p = NEW_ARENA_OBJ(&_arenas[worker_id], PartialArrayState);
|
||||||
|
} else {
|
||||||
|
_free_lists[worker_id] = head->_next;
|
||||||
|
head->~FreeListEntry();
|
||||||
|
p = head;
|
||||||
|
}
|
||||||
|
return ::new (p) PartialArrayState(src, dst, index, length, initial_refcount);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PartialArrayStateAllocator::Impl::release(uint worker_id, PartialArrayState* state) {
|
||||||
|
size_t refcount = Atomic::sub(&state->_refcount, size_t(1), memory_order_release);
|
||||||
|
if (refcount != 0) {
|
||||||
|
assert(refcount + 1 != 0, "refcount underflow");
|
||||||
|
} else {
|
||||||
|
OrderAccess::acquire();
|
||||||
|
state->~PartialArrayState();
|
||||||
|
_free_lists[worker_id] = ::new (state) FreeListEntry(_free_lists[worker_id]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PartialArrayStateAllocator::PartialArrayStateAllocator(uint num_workers)
|
||||||
|
: _impl(new Impl(num_workers))
|
||||||
|
{}
|
||||||
|
|
||||||
|
PartialArrayStateAllocator::~PartialArrayStateAllocator() {
|
||||||
|
delete _impl;
|
||||||
|
}
|
||||||
|
|
||||||
|
PartialArrayState* PartialArrayStateAllocator::allocate(uint worker_id,
|
||||||
|
oop src, oop dst,
|
||||||
|
size_t index,
|
||||||
|
size_t length,
|
||||||
|
size_t initial_refcount) {
|
||||||
|
return _impl->allocate(worker_id, src, dst, index, length, initial_refcount);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PartialArrayStateAllocator::release(uint worker_id, PartialArrayState* state) {
|
||||||
|
_impl->release(worker_id, state);
|
||||||
|
}
|
||||||
|
|
136
src/hotspot/share/gc/shared/partialArrayState.hpp
Normal file
136
src/hotspot/share/gc/shared/partialArrayState.hpp
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SHARE_GC_SHARED_PARTIALARRAYSTATE_HPP
|
||||||
|
#define SHARE_GC_SHARED_PARTIALARRAYSTATE_HPP
|
||||||
|
|
||||||
|
#include "oops/oopsHierarchy.hpp"
|
||||||
|
#include "utilities/globalDefinitions.hpp"
|
||||||
|
#include "utilities/macros.hpp"
|
||||||
|
|
||||||
|
class PartialArrayStateAllocator;
|
||||||
|
|
||||||
|
// Instances of this class are used to represent processing progress for an
|
||||||
|
// array task in a taskqueue. When a sufficiently large array needs to be
|
||||||
|
// processed, such that it is desirable to split up the processing into
|
||||||
|
// parallelizable subtasks, a state object is allocated for the array.
|
||||||
|
// Multiple tasks referring to the state can then be added to the taskqueue
|
||||||
|
// for later processing, either by the current thread or by some other thread
|
||||||
|
// that steals one of those tasks.
|
||||||
|
//
|
||||||
|
// Processing a state involves using the state to claim a segment of the
|
||||||
|
// array, and processing that segment. Claiming is done by atomically
|
||||||
|
// incrementing the index, thereby claiming the segment from the old to new
|
||||||
|
// index values. New tasks should also be added as needed to ensure the
|
||||||
|
// entire array will be processed. A PartialArrayTaskStepper can be used to
|
||||||
|
// help with this.
|
||||||
|
//
|
||||||
|
// States are allocated and released using a PartialArrayStateAllocator.
|
||||||
|
// States are reference counted to aid in that management. Each task
|
||||||
|
// referring to a given state that is added to a taskqueue must increase the
|
||||||
|
// reference count by one. When the processing of a task referring to a state
|
||||||
|
// is complete, the reference count must be decreased by one. When the
|
||||||
|
// reference count reaches zero the state should be released to the allocator
|
||||||
|
// for later reuse.
|
||||||
|
class PartialArrayState {
|
||||||
|
oop _source;
|
||||||
|
oop _destination;
|
||||||
|
size_t _length;
|
||||||
|
volatile size_t _index;
|
||||||
|
volatile size_t _refcount;
|
||||||
|
|
||||||
|
friend class PartialArrayStateAllocator;
|
||||||
|
|
||||||
|
PartialArrayState(oop src, oop dst,
|
||||||
|
size_t index, size_t length,
|
||||||
|
size_t initial_refcount);
|
||||||
|
~PartialArrayState() = default;
|
||||||
|
|
||||||
|
NONCOPYABLE(PartialArrayState);
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Add count references, one per referring task being added to a taskqueue.
|
||||||
|
void add_references(size_t count);
|
||||||
|
|
||||||
|
// The source array oop.
|
||||||
|
oop source() const { return _source; }
|
||||||
|
|
||||||
|
// The destination array oop. In some circumstances the source and
|
||||||
|
// destination may be the same.
|
||||||
|
oop destination() const { return _destination; }
|
||||||
|
|
||||||
|
// The length of the array oop.
|
||||||
|
size_t length() const { return _length; }
|
||||||
|
|
||||||
|
// A pointer to the start index for the next segment to process, for atomic
|
||||||
|
// update.
|
||||||
|
volatile size_t* index_addr() { return &_index; }
|
||||||
|
};
|
||||||
|
|
||||||
|
// This class provides memory management for PartialArrayStates.
|
||||||
|
//
|
||||||
|
// States are initially allocated from a set of arenas owned by the allocator.
|
||||||
|
// This allows the entire set of allocated states to be discarded without the
|
||||||
|
// need to keep track of or find them under some circumstances. For example,
|
||||||
|
// if G1 concurrent marking is aborted and needs to restart because of a full
|
||||||
|
// marking queue, the queue doesn't need to be searched for tasks referring to
|
||||||
|
// states to allow releasing them. Instead the queue contents can just be
|
||||||
|
// discarded, and the memory for the no longer referenced states will
|
||||||
|
// eventually be reclaimed when the arenas are reset.
|
||||||
|
//
|
||||||
|
// A set of free-lists is placed in front of the arena allocators. This
|
||||||
|
// causes the maximum number of allocated states to be based on the number of
|
||||||
|
// in-progress arrays, rather than the total number of arrays that need to be
|
||||||
|
// processed. The use of free-list allocators is the reason for reference
|
||||||
|
// counting states.
|
||||||
|
//
|
||||||
|
// The arena and free-list to use for an allocation operation is designated by
|
||||||
|
// the worker_id used in the operation. This avoids locking and such on those
|
||||||
|
// data structures, at the cost of possibly doing more total arena allocation
|
||||||
|
// that would be needed with a single shared arena and free-list.
|
||||||
|
class PartialArrayStateAllocator {
|
||||||
|
class Impl;
|
||||||
|
Impl* _impl;
|
||||||
|
|
||||||
|
public:
|
||||||
|
PartialArrayStateAllocator(uint num_workers);
|
||||||
|
~PartialArrayStateAllocator();
|
||||||
|
|
||||||
|
NONCOPYABLE(PartialArrayStateAllocator);
|
||||||
|
|
||||||
|
// Create a new state, obtaining the memory for it from the free-list or
|
||||||
|
// arena associated with worker_id.
|
||||||
|
PartialArrayState* allocate(uint worker_id,
|
||||||
|
oop src, oop dst,
|
||||||
|
size_t index, size_t length,
|
||||||
|
size_t initial_refcount);
|
||||||
|
|
||||||
|
// Decrement the state's refcount. If the new refcount is zero, add the
|
||||||
|
// state to the free-list associated with worker_id. The state must have
|
||||||
|
// been allocated by this allocator, but that allocation doesn't need to
|
||||||
|
// have been associated with worker_id.
|
||||||
|
void release(uint worker_id, PartialArrayState* state);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // SHARE_GC_SHARED_PARTIALARRAYSTATE_HPP
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -25,6 +25,7 @@
|
|||||||
#include "precompiled.hpp"
|
#include "precompiled.hpp"
|
||||||
#include "gc/shared/partialArrayTaskStepper.hpp"
|
#include "gc/shared/partialArrayTaskStepper.hpp"
|
||||||
#include "oops/arrayOop.hpp"
|
#include "oops/arrayOop.hpp"
|
||||||
|
#include "utilities/debug.hpp"
|
||||||
#include "utilities/globalDefinitions.hpp"
|
#include "utilities/globalDefinitions.hpp"
|
||||||
#include "utilities/powerOfTwo.hpp"
|
#include "utilities/powerOfTwo.hpp"
|
||||||
|
|
||||||
@ -48,7 +49,8 @@ static uint compute_task_fanout(uint task_limit) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
PartialArrayTaskStepper::PartialArrayTaskStepper(uint n_workers) :
|
PartialArrayTaskStepper::PartialArrayTaskStepper(uint n_workers, size_t chunk_size) :
|
||||||
|
_chunk_size(chunk_size),
|
||||||
_task_limit(compute_task_limit(n_workers)),
|
_task_limit(compute_task_limit(n_workers)),
|
||||||
_task_fanout(compute_task_fanout(_task_limit))
|
_task_fanout(compute_task_fanout(_task_limit))
|
||||||
{}
|
{}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -28,55 +28,52 @@
|
|||||||
#include "oops/arrayOop.hpp"
|
#include "oops/arrayOop.hpp"
|
||||||
#include "utilities/globalDefinitions.hpp"
|
#include "utilities/globalDefinitions.hpp"
|
||||||
|
|
||||||
// Helper for handling PartialArrayTasks.
|
class PartialArrayState;
|
||||||
|
|
||||||
|
// Helper for partial array chunking tasks.
|
||||||
//
|
//
|
||||||
// When an array is large, we want to split it up into chunks that can be
|
// When an array is large, we want to split it up into chunks that can be
|
||||||
// processed in parallel. Each task (implicitly) represents such a chunk.
|
// processed in parallel. Each task (implicitly) represents such a chunk. We
|
||||||
// We can enqueue multiple tasks at the same time. We want to enqueue
|
// can enqueue multiple tasks at the same time. We want to enqueue enough
|
||||||
// enough tasks to benefit from the available parallelism, while not so many
|
// tasks to benefit from the available parallelism, while not so many as to
|
||||||
// as to substantially expand the task queues.
|
// substantially expand the task queues.
|
||||||
//
|
|
||||||
// A task directly refers to the from-space array. The from-space array's
|
|
||||||
// forwarding pointer refers to the associated to-space array, and its
|
|
||||||
// length is the actual length. The to-space array's length field is used to
|
|
||||||
// indicate processing progress. It is the starting index of the next chunk
|
|
||||||
// to process, or equals the actual length when there are no more chunks to
|
|
||||||
// be processed.
|
|
||||||
class PartialArrayTaskStepper {
|
class PartialArrayTaskStepper {
|
||||||
public:
|
public:
|
||||||
PartialArrayTaskStepper(uint n_workers);
|
PartialArrayTaskStepper(uint n_workers, size_t chunk_size);
|
||||||
|
|
||||||
struct Step {
|
struct Step {
|
||||||
int _index; // Array index for the step.
|
size_t _index; // Array index for the step.
|
||||||
uint _ncreate; // Number of new tasks to create.
|
uint _ncreate; // Number of new tasks to create.
|
||||||
};
|
};
|
||||||
|
|
||||||
// Set to's length to the end of the initial chunk, which is the start of
|
// Called with the length of the array to be processed. Returns a Step with
|
||||||
// the first partial task if the array is large enough to need splitting.
|
// _index being the end of the initial chunk, which the caller should
|
||||||
// Returns a Step with _index being that index and _ncreate being the
|
// process. This is also the starting index for the next chunk to process.
|
||||||
// initial number of partial tasks to enqueue.
|
// The _ncreate is the number of tasks to enqueue to continue processing the
|
||||||
inline Step start(arrayOop from, arrayOop to, int chunk_size) const;
|
// array. If _ncreate is zero then _index will be length.
|
||||||
|
inline Step start(size_t length) const;
|
||||||
|
|
||||||
// Increment to's length by chunk_size to claim the next chunk. Returns a
|
// Atomically increment state's index by chunk_size() to claim the next
|
||||||
// Step with _index being the starting index of the claimed chunk and
|
// chunk. Returns a Step with _index being the starting index of the
|
||||||
// _ncreate being the number of additional partial tasks to enqueue.
|
// claimed chunk and _ncreate being the number of additional partial tasks
|
||||||
// precondition: chunk_size must be the same as used to start the task sequence.
|
// to enqueue.
|
||||||
inline Step next(arrayOop from, arrayOop to, int chunk_size) const;
|
inline Step next(PartialArrayState* state) const;
|
||||||
|
|
||||||
|
// The size of chunks to claim for each task.
|
||||||
|
inline size_t chunk_size() const;
|
||||||
|
|
||||||
class TestSupport; // For unit tests
|
class TestSupport; // For unit tests
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// Size (number of elements) of a chunk to process.
|
||||||
|
size_t _chunk_size;
|
||||||
// Limit on the number of partial array tasks to create for a given array.
|
// Limit on the number of partial array tasks to create for a given array.
|
||||||
uint _task_limit;
|
uint _task_limit;
|
||||||
// Maximum number of new tasks to create when processing an existing task.
|
// Maximum number of new tasks to create when processing an existing task.
|
||||||
uint _task_fanout;
|
uint _task_fanout;
|
||||||
|
|
||||||
// Split start/next into public part dealing with oops and private
|
// For unit tests.
|
||||||
// impl dealing with lengths and pointers to lengths, for unit testing.
|
inline Step next_impl(size_t length, volatile size_t* index_addr) const;
|
||||||
// length is the actual length obtained from the from-space object.
|
|
||||||
// to_length_addr is the address of the to-space object's length value.
|
|
||||||
inline Step start_impl(int length, int* to_length_addr, int chunk_size) const;
|
|
||||||
inline Step next_impl(int length, int* to_length_addr, int chunk_size) const;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_HPP
|
#endif // SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_HPP
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -25,66 +25,46 @@
|
|||||||
#ifndef SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_INLINE_HPP
|
#ifndef SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_INLINE_HPP
|
||||||
#define SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_INLINE_HPP
|
#define SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_INLINE_HPP
|
||||||
|
|
||||||
|
#include "gc/shared/partialArrayState.hpp"
|
||||||
#include "gc/shared/partialArrayTaskStepper.hpp"
|
#include "gc/shared/partialArrayTaskStepper.hpp"
|
||||||
|
|
||||||
#include "oops/arrayOop.hpp"
|
|
||||||
#include "runtime/atomic.hpp"
|
#include "runtime/atomic.hpp"
|
||||||
|
#include "utilities/checkedCast.hpp"
|
||||||
|
#include "utilities/debug.hpp"
|
||||||
|
|
||||||
|
size_t PartialArrayTaskStepper::chunk_size() const {
|
||||||
|
return _chunk_size;
|
||||||
|
}
|
||||||
|
|
||||||
PartialArrayTaskStepper::Step
|
PartialArrayTaskStepper::Step
|
||||||
PartialArrayTaskStepper::start_impl(int length,
|
PartialArrayTaskStepper::start(size_t length) const {
|
||||||
int* to_length_addr,
|
size_t end = length % _chunk_size; // End of initial chunk.
|
||||||
int chunk_size) const {
|
|
||||||
assert(chunk_size > 0, "precondition");
|
|
||||||
|
|
||||||
int end = length % chunk_size; // End of initial chunk.
|
|
||||||
// Set to's length to end of initial chunk. Partial tasks use that length
|
|
||||||
// field as the start of the next chunk to process. Must be done before
|
|
||||||
// enqueuing partial scan tasks, in case other threads steal any of those
|
|
||||||
// tasks.
|
|
||||||
//
|
|
||||||
// The value of end can be 0, either because of a 0-length array or
|
|
||||||
// because length is a multiple of the chunk size. Both of those are
|
|
||||||
// relatively rare and handled in the normal course of the iteration, so
|
|
||||||
// not worth doing anything special about here.
|
|
||||||
*to_length_addr = end;
|
|
||||||
|
|
||||||
// If the initial chunk is the complete array, then don't need any partial
|
// If the initial chunk is the complete array, then don't need any partial
|
||||||
// tasks. Otherwise, start with just one partial task; see new task
|
// tasks. Otherwise, start with just one partial task; see new task
|
||||||
// calculation in next().
|
// calculation in next().
|
||||||
Step result = { end, (length > end) ? 1u : 0u };
|
return Step{ end, (length > end) ? 1u : 0u };
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PartialArrayTaskStepper::Step
|
PartialArrayTaskStepper::Step
|
||||||
PartialArrayTaskStepper::start(arrayOop from, arrayOop to, int chunk_size) const {
|
PartialArrayTaskStepper::next_impl(size_t length, volatile size_t* index_addr) const {
|
||||||
return start_impl(from->length(), to->length_addr(), chunk_size);
|
// The start of the next task is in the state's index.
|
||||||
}
|
|
||||||
|
|
||||||
PartialArrayTaskStepper::Step
|
|
||||||
PartialArrayTaskStepper::next_impl(int length,
|
|
||||||
int* to_length_addr,
|
|
||||||
int chunk_size) const {
|
|
||||||
assert(chunk_size > 0, "precondition");
|
|
||||||
|
|
||||||
// The start of the next task is in the length field of the to-space object.
|
|
||||||
// Atomically increment by the chunk size to claim the associated chunk.
|
// Atomically increment by the chunk size to claim the associated chunk.
|
||||||
// Because we limit the number of enqueued tasks to being no more than the
|
// Because we limit the number of enqueued tasks to being no more than the
|
||||||
// number of remaining chunks to process, we can use an atomic add for the
|
// number of remaining chunks to process, we can use an atomic add for the
|
||||||
// claim, rather than a CAS loop.
|
// claim, rather than a CAS loop.
|
||||||
int start = Atomic::fetch_then_add(to_length_addr,
|
size_t start = Atomic::fetch_then_add(index_addr,
|
||||||
chunk_size,
|
_chunk_size,
|
||||||
memory_order_relaxed);
|
memory_order_relaxed);
|
||||||
|
|
||||||
assert(start < length, "invariant: start %d, length %d", start, length);
|
assert(start < length, "invariant: start %zu, length %zu", start, length);
|
||||||
assert(((length - start) % chunk_size) == 0,
|
assert(((length - start) % _chunk_size) == 0,
|
||||||
"invariant: start %d, length %d, chunk size %d",
|
"invariant: start %zu, length %zu, chunk size %zu",
|
||||||
start, length, chunk_size);
|
start, length, _chunk_size);
|
||||||
|
|
||||||
// Determine the number of new tasks to create.
|
// Determine the number of new tasks to create.
|
||||||
// Zero-based index for this partial task. The initial task isn't counted.
|
// Zero-based index for this partial task. The initial task isn't counted.
|
||||||
uint task_num = (start / chunk_size);
|
uint task_num = checked_cast<uint>(start / _chunk_size);
|
||||||
// Number of tasks left to process, including this one.
|
// Number of tasks left to process, including this one.
|
||||||
uint remaining_tasks = (length - start) / chunk_size;
|
uint remaining_tasks = checked_cast<uint>((length - start) / _chunk_size);
|
||||||
assert(remaining_tasks > 0, "invariant");
|
assert(remaining_tasks > 0, "invariant");
|
||||||
// Compute number of pending tasks, including this one. The maximum number
|
// Compute number of pending tasks, including this one. The maximum number
|
||||||
// of tasks is a function of task_num (N) and _task_fanout (F).
|
// of tasks is a function of task_num (N) and _task_fanout (F).
|
||||||
@ -106,13 +86,12 @@ PartialArrayTaskStepper::next_impl(int length,
|
|||||||
// of tasks to add for this task.
|
// of tasks to add for this task.
|
||||||
uint pending = MIN3(max_pending, remaining_tasks, _task_limit);
|
uint pending = MIN3(max_pending, remaining_tasks, _task_limit);
|
||||||
uint ncreate = MIN2(_task_fanout, MIN2(remaining_tasks, _task_limit + 1) - pending);
|
uint ncreate = MIN2(_task_fanout, MIN2(remaining_tasks, _task_limit + 1) - pending);
|
||||||
Step result = { start, ncreate };
|
return Step{ start, ncreate };
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PartialArrayTaskStepper::Step
|
PartialArrayTaskStepper::Step
|
||||||
PartialArrayTaskStepper::next(arrayOop from, arrayOop to, int chunk_size) const {
|
PartialArrayTaskStepper::next(PartialArrayState* state) const {
|
||||||
return next_impl(from->length(), to->length_addr(), chunk_size);
|
return next_impl(state->length(), state->index_addr());
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_INLINE_HPP
|
#endif // SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_INLINE_HPP
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -576,6 +576,7 @@ private:
|
|||||||
// Wrapper over an oop that is a partially scanned array.
|
// Wrapper over an oop that is a partially scanned array.
|
||||||
// Can be converted to a ScannerTask for placement in associated task queues.
|
// Can be converted to a ScannerTask for placement in associated task queues.
|
||||||
// Refers to the partially copied source array oop.
|
// Refers to the partially copied source array oop.
|
||||||
|
// Temporarily retained to support ParallelGC until it adopts PartialArrayState.
|
||||||
class PartialArrayScanTask {
|
class PartialArrayScanTask {
|
||||||
oop _src;
|
oop _src;
|
||||||
|
|
||||||
@ -586,7 +587,9 @@ public:
|
|||||||
oop to_source_array() const { return _src; }
|
oop to_source_array() const { return _src; }
|
||||||
};
|
};
|
||||||
|
|
||||||
// Discriminated union over oop*, narrowOop*, and PartialArrayScanTask.
|
class PartialArrayState;
|
||||||
|
|
||||||
|
// Discriminated union over oop*, narrowOop*, and PartialArrayState.
|
||||||
// Uses a low tag in the associated pointer to identify the category.
|
// Uses a low tag in the associated pointer to identify the category.
|
||||||
// Used as a task queue element type.
|
// Used as a task queue element type.
|
||||||
class ScannerTask {
|
class ScannerTask {
|
||||||
@ -624,9 +627,13 @@ public:
|
|||||||
|
|
||||||
explicit ScannerTask(narrowOop* p) : _p(encode(p, NarrowOopTag)) {}
|
explicit ScannerTask(narrowOop* p) : _p(encode(p, NarrowOopTag)) {}
|
||||||
|
|
||||||
|
// Temporarily retained to support ParallelGC until it adopts PartialArrayState.
|
||||||
explicit ScannerTask(PartialArrayScanTask t) :
|
explicit ScannerTask(PartialArrayScanTask t) :
|
||||||
_p(encode(t.to_source_array(), PartialArrayTag)) {}
|
_p(encode(t.to_source_array(), PartialArrayTag)) {}
|
||||||
|
|
||||||
|
explicit ScannerTask(PartialArrayState* state) :
|
||||||
|
_p(encode(state, PartialArrayTag)) {}
|
||||||
|
|
||||||
// Trivially copyable.
|
// Trivially copyable.
|
||||||
|
|
||||||
// Predicate implementations assume OopTag == 0, others are powers of 2.
|
// Predicate implementations assume OopTag == 0, others are powers of 2.
|
||||||
@ -639,10 +646,15 @@ public:
|
|||||||
return (raw_value() & NarrowOopTag) != 0;
|
return (raw_value() & NarrowOopTag) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Temporarily retained to support ParallelGC until it adopts PartialArrayState.
|
||||||
bool is_partial_array_task() const {
|
bool is_partial_array_task() const {
|
||||||
return (raw_value() & PartialArrayTag) != 0;
|
return (raw_value() & PartialArrayTag) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_partial_array_state() const {
|
||||||
|
return (raw_value() & PartialArrayTag) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
oop* to_oop_ptr() const {
|
oop* to_oop_ptr() const {
|
||||||
return static_cast<oop*>(decode(OopTag));
|
return static_cast<oop*>(decode(OopTag));
|
||||||
}
|
}
|
||||||
@ -651,9 +663,14 @@ public:
|
|||||||
return static_cast<narrowOop*>(decode(NarrowOopTag));
|
return static_cast<narrowOop*>(decode(NarrowOopTag));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Temporarily retained to support ParallelGC until it adopts PartialArrayState.
|
||||||
PartialArrayScanTask to_partial_array_task() const {
|
PartialArrayScanTask to_partial_array_task() const {
|
||||||
return PartialArrayScanTask(cast_to_oop(decode(PartialArrayTag)));
|
return PartialArrayScanTask(cast_to_oop(decode(PartialArrayTag)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PartialArrayState* to_partial_array_state() const {
|
||||||
|
return static_cast<PartialArrayState*>(decode(PartialArrayTag));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // SHARE_GC_SHARED_TASKQUEUE_HPP
|
#endif // SHARE_GC_SHARED_TASKQUEUE_HPP
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -32,50 +32,42 @@ using Stepper = PartialArrayTaskStepper;
|
|||||||
|
|
||||||
class PartialArrayTaskStepper::TestSupport : AllStatic {
|
class PartialArrayTaskStepper::TestSupport : AllStatic {
|
||||||
public:
|
public:
|
||||||
static Step start(const Stepper* stepper,
|
|
||||||
int length,
|
|
||||||
int* to_length_addr,
|
|
||||||
uint chunk_size) {
|
|
||||||
return stepper->start_impl(length, to_length_addr, chunk_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
static Step next(const Stepper* stepper,
|
static Step next(const Stepper* stepper,
|
||||||
int length,
|
size_t length,
|
||||||
int* to_length_addr,
|
size_t* to_length_addr) {
|
||||||
uint chunk_size) {
|
return stepper->next_impl(length, to_length_addr);
|
||||||
return stepper->next_impl(length, to_length_addr, chunk_size);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
using StepperSupport = PartialArrayTaskStepper::TestSupport;
|
using StepperSupport = PartialArrayTaskStepper::TestSupport;
|
||||||
|
|
||||||
static int simulate(const Stepper* stepper,
|
static uint simulate(const Stepper* stepper,
|
||||||
int length,
|
size_t length,
|
||||||
int* to_length_addr,
|
size_t* to_length_addr) {
|
||||||
uint chunk_size) {
|
Step init = stepper->start(length);
|
||||||
Step init = StepperSupport::start(stepper, length, to_length_addr, chunk_size);
|
*to_length_addr = init._index;
|
||||||
uint queue_count = init._ncreate;
|
uint queue_count = init._ncreate;
|
||||||
int task = 0;
|
uint task = 0;
|
||||||
for ( ; queue_count > 0; ++task) {
|
for ( ; queue_count > 0; ++task) {
|
||||||
--queue_count;
|
--queue_count;
|
||||||
Step step = StepperSupport::next(stepper, length, to_length_addr, chunk_size);
|
Step step = StepperSupport::next(stepper, length, to_length_addr);
|
||||||
queue_count += step._ncreate;
|
queue_count += step._ncreate;
|
||||||
}
|
}
|
||||||
return task;
|
return task;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void run_test(int length, int chunk_size, uint n_workers) {
|
static void run_test(size_t length, size_t chunk_size, uint n_workers) {
|
||||||
const PartialArrayTaskStepper stepper(n_workers);
|
const PartialArrayTaskStepper stepper(n_workers, chunk_size);
|
||||||
int to_length;
|
size_t to_length;
|
||||||
int tasks = simulate(&stepper, length, &to_length, chunk_size);
|
uint tasks = simulate(&stepper, length, &to_length);
|
||||||
ASSERT_EQ(length, to_length);
|
ASSERT_EQ(length, to_length);
|
||||||
ASSERT_EQ(tasks, length / chunk_size);
|
ASSERT_EQ(tasks, length / chunk_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(PartialArrayTaskStepperTest, doit) {
|
TEST(PartialArrayTaskStepperTest, doit) {
|
||||||
for (int chunk_size = 50; chunk_size <= 500; chunk_size += 50) {
|
for (size_t chunk_size = 50; chunk_size <= 500; chunk_size += 50) {
|
||||||
for (uint n_workers = 1; n_workers <= 256; n_workers = (n_workers * 3 / 2 + 1)) {
|
for (uint n_workers = 1; n_workers <= 256; n_workers = (n_workers * 3 / 2 + 1)) {
|
||||||
for (int length = 0; length <= 1000000; length = (length * 2 + 1)) {
|
for (size_t length = 0; length <= 1000000; length = (length * 2 + 1)) {
|
||||||
run_test(length, chunk_size, n_workers);
|
run_test(length, chunk_size, n_workers);
|
||||||
}
|
}
|
||||||
// Ensure we hit boundary cases for length % chunk_size == 0.
|
// Ensure we hit boundary cases for length % chunk_size == 0.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user