8191471: Elastic TLABs for G1

Reviewed-by: tschatzl, pliden
This commit is contained in:
Stefan Johansson 2018-05-02 13:44:46 +02:00
parent 8e69e1b11f
commit 7f7d103c85
16 changed files with 268 additions and 77 deletions

@ -95,33 +95,40 @@ size_t G1AllocRegion::fill_up_remaining_space(HeapRegion* alloc_region) {
return result;
}
size_t G1AllocRegion::retire_internal(HeapRegion* alloc_region, bool fill_up) {
// We never have to check whether the active region is empty or not,
// and potentially free it if it is, given that it's guaranteed that
// it will never be empty.
size_t waste = 0;
assert_alloc_region(!alloc_region->is_empty(),
"the alloc region should never be empty");
if (fill_up) {
waste = fill_up_remaining_space(alloc_region);
}
assert_alloc_region(alloc_region->used() >= _used_bytes_before, "invariant");
size_t allocated_bytes = alloc_region->used() - _used_bytes_before;
retire_region(alloc_region, allocated_bytes);
_used_bytes_before = 0;
return waste;
}
size_t G1AllocRegion::retire(bool fill_up) {
assert_alloc_region(_alloc_region != NULL, "not initialized properly");
size_t result = 0;
size_t waste = 0;
trace("retiring");
HeapRegion* alloc_region = _alloc_region;
if (alloc_region != _dummy_region) {
// We never have to check whether the active region is empty or not,
// and potentially free it if it is, given that it's guaranteed that
// it will never be empty.
assert_alloc_region(!alloc_region->is_empty(),
"the alloc region should never be empty");
if (fill_up) {
result = fill_up_remaining_space(alloc_region);
}
assert_alloc_region(alloc_region->used() >= _used_bytes_before, "invariant");
size_t allocated_bytes = alloc_region->used() - _used_bytes_before;
retire_region(alloc_region, allocated_bytes);
_used_bytes_before = 0;
_alloc_region = _dummy_region;
waste = retire_internal(alloc_region, fill_up);
reset_alloc_region();
}
trace("retired");
return result;
return waste;
}
HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size,
@ -245,7 +252,8 @@ void G1AllocRegion::trace(const char* str, size_t min_word_size, size_t desired_
G1AllocRegion::G1AllocRegion(const char* name,
bool bot_updates)
: _name(name), _bot_updates(bot_updates),
_alloc_region(NULL), _count(0), _used_bytes_before(0) { }
_alloc_region(NULL), _count(0),
_used_bytes_before(0) { }
HeapRegion* MutatorAllocRegion::allocate_new_region(size_t word_size,
@ -258,6 +266,82 @@ void MutatorAllocRegion::retire_region(HeapRegion* alloc_region,
_g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes);
}
void MutatorAllocRegion::init() {
assert(_retained_alloc_region == NULL, "Pre-condition");
G1AllocRegion::init();
_wasted_bytes = 0;
}
bool MutatorAllocRegion::should_retain(HeapRegion* region) {
size_t free_bytes = region->free();
if (free_bytes < MinTLABSize) {
return false;
}
if (_retained_alloc_region != NULL &&
free_bytes < _retained_alloc_region->free()) {
return false;
}
return true;
}
size_t MutatorAllocRegion::retire(bool fill_up) {
size_t waste = 0;
trace("retiring");
HeapRegion* current_region = get();
if (current_region != NULL) {
// Retain the current region if it fits a TLAB and has more
// free than the currently retained region.
if (should_retain(current_region)) {
trace("mutator retained");
if (_retained_alloc_region != NULL) {
waste = retire_internal(_retained_alloc_region, true);
}
_retained_alloc_region = current_region;
} else {
waste = retire_internal(current_region, fill_up);
}
reset_alloc_region();
}
_wasted_bytes += waste;
trace("retired");
return waste;
}
size_t MutatorAllocRegion::used_in_alloc_regions() {
size_t used = 0;
HeapRegion* hr = get();
if (hr != NULL) {
used += hr->used();
}
hr = _retained_alloc_region;
if (hr != NULL) {
used += hr->used();
}
return used;
}
HeapRegion* MutatorAllocRegion::release() {
HeapRegion* ret = G1AllocRegion::release();
// The retained alloc region must be retired and this must be
// done after the above call to release the mutator alloc region,
// since it might update the _retained_alloc_region member.
if (_retained_alloc_region != NULL) {
_wasted_bytes += retire_internal(_retained_alloc_region, false);
_retained_alloc_region = NULL;
}
log_debug(gc, alloc, region)("Mutator Allocation stats, regions: %u, wasted size: " SIZE_FORMAT "%s (%4.1f%%)",
count(),
byte_size_in_proper_unit(_wasted_bytes),
proper_unit_for_byte_size(_wasted_bytes),
percent_of(_wasted_bytes, count() * HeapRegion::GrainBytes));
return ret;
}
HeapRegion* G1GCAllocRegion::allocate_new_region(size_t word_size,
bool force) {
assert(!force, "not supported for GC alloc regions");

@ -80,6 +80,20 @@ private:
// whether the _alloc_region is NULL or not.
static HeapRegion* _dummy_region;
// After a region is allocated by alloc_new_region, this
// method is used to set it as the active alloc_region
void update_alloc_region(HeapRegion* alloc_region);
// Allocate a new active region and use it to perform a word_size
// allocation. The force parameter will be passed on to
// G1CollectedHeap::allocate_new_alloc_region() and tells it to try
// to allocate a new region even if the max has been reached.
HeapWord* new_alloc_region_and_allocate(size_t word_size, bool force);
protected:
// Reset the alloc region to point a the dummy region.
void reset_alloc_region();
// Perform a non-MT-safe allocation out of the given region.
inline HeapWord* allocate(HeapRegion* alloc_region,
size_t word_size);
@ -102,23 +116,14 @@ private:
// the space.
size_t fill_up_remaining_space(HeapRegion* alloc_region);
// After a region is allocated by alloc_new_region, this
// method is used to set it as the active alloc_region
void update_alloc_region(HeapRegion* alloc_region);
// Allocate a new active region and use it to perform a word_size
// allocation. The force parameter will be passed on to
// G1CollectedHeap::allocate_new_alloc_region() and tells it to try
// to allocate a new region even if the max has been reached.
HeapWord* new_alloc_region_and_allocate(size_t word_size, bool force);
protected:
// Retire the active allocating region. If fill_up is true then make
// sure that the region is full before we retire it so that no one
// else can allocate out of it.
// Returns the number of bytes that have been filled up during retire.
virtual size_t retire(bool fill_up);
size_t retire_internal(HeapRegion* alloc_region, bool fill_up);
// For convenience as subclasses use it.
static G1CollectedHeap* _g1h;
@ -177,7 +182,7 @@ public:
inline HeapWord* attempt_allocation_force(size_t word_size);
// Should be called before we start using this object.
void init();
virtual void init();
// This can be used to set the active region to a specific
// region. (Use Example: we try to retain the last old GC alloc
@ -197,14 +202,49 @@ public:
};
class MutatorAllocRegion : public G1AllocRegion {
private:
// Keeps track of the total waste generated during the current
// mutator phase.
size_t _wasted_bytes;
// Retained allocation region. Used to lower the waste generated
// during mutation by having two active regions if the free space
// in a region about to be retired still could fit a TLAB.
HeapRegion* volatile _retained_alloc_region;
// Decide if the region should be retained, based on the free size
// in it and the free size in the currently retained region, if any.
bool should_retain(HeapRegion* region);
protected:
virtual HeapRegion* allocate_new_region(size_t word_size, bool force);
virtual void retire_region(HeapRegion* alloc_region, size_t allocated_bytes);
virtual size_t retire(bool fill_up);
public:
MutatorAllocRegion()
: G1AllocRegion("Mutator Alloc Region", false /* bot_updates */) { }
};
: G1AllocRegion("Mutator Alloc Region", false /* bot_updates */),
_wasted_bytes(0),
_retained_alloc_region(NULL) { }
// Returns the combined used memory in the current alloc region and
// the retained alloc region.
size_t used_in_alloc_regions();
// Perform an allocation out of the retained allocation region, with the given
// minimum and desired size. Returns the actual size allocated (between
// minimum and desired size) in actual_word_size if the allocation has been
// successful.
// Should be called without holding a lock. It will try to allocate lock-free
// out of the retained region, or return NULL if it was unable to.
inline HeapWord* attempt_retained_allocation(size_t min_word_size,
size_t desired_word_size,
size_t* actual_word_size);
// This specialization of release() makes sure that the retained alloc
// region is retired and set to NULL.
virtual HeapRegion* release();
virtual void init();
};
// Common base class for allocation regions used during GC.
class G1GCAllocRegion : public G1AllocRegion {
protected:

@ -36,6 +36,10 @@
} while (0)
inline void G1AllocRegion::reset_alloc_region() {
_alloc_region = _dummy_region;
}
inline HeapWord* G1AllocRegion::allocate(HeapRegion* alloc_region,
size_t word_size) {
assert(alloc_region != NULL, "pre-condition");
@ -126,4 +130,17 @@ inline HeapWord* G1AllocRegion::attempt_allocation_force(size_t word_size) {
return NULL;
}
inline HeapWord* MutatorAllocRegion::attempt_retained_allocation(size_t min_word_size,
size_t desired_word_size,
size_t* actual_word_size) {
if (_retained_alloc_region != NULL) {
HeapWord* result = par_allocate(_retained_alloc_region, min_word_size, desired_word_size, actual_word_size);
if (result != NULL) {
trace("alloc retained", min_word_size, desired_word_size, *actual_word_size, result);
return result;
}
}
return NULL;
}
#endif // SHARE_VM_GC_G1_G1ALLOCREGION_INLINE_HPP

@ -157,14 +157,7 @@ size_t G1Allocator::unsafe_max_tlab_alloc() {
size_t G1Allocator::used_in_alloc_regions() {
assert(Heap_lock->owner() != NULL, "Should be owned on this thread's behalf.");
size_t result = 0;
// Read only once in case it is set to NULL concurrently
HeapRegion* hr = mutator_alloc_region()->get();
if (hr != NULL) {
result += hr->used();
}
return result;
return mutator_alloc_region()->used_in_alloc_regions();
}

@ -99,7 +99,9 @@ public:
// Allocate blocks of memory during mutator time.
inline HeapWord* attempt_allocation(size_t word_size);
inline HeapWord* attempt_allocation(size_t min_word_size,
size_t desired_word_size,
size_t* actual_word_size);
inline HeapWord* attempt_allocation_locked(size_t word_size);
inline HeapWord* attempt_allocation_force(size_t word_size);

@ -41,8 +41,14 @@ inline OldGCAllocRegion* G1Allocator::old_gc_alloc_region() {
return &_old_gc_alloc_region;
}
inline HeapWord* G1Allocator::attempt_allocation(size_t word_size) {
return mutator_alloc_region()->attempt_allocation(word_size);
inline HeapWord* G1Allocator::attempt_allocation(size_t min_word_size,
size_t desired_word_size,
size_t* actual_word_size) {
HeapWord* result = mutator_alloc_region()->attempt_retained_allocation(min_word_size, desired_word_size, actual_word_size);
if (result != NULL) {
return result;
}
return mutator_alloc_region()->attempt_allocation(min_word_size, desired_word_size, actual_word_size);
}
inline HeapWord* G1Allocator::attempt_allocation_locked(size_t word_size) {

@ -384,11 +384,13 @@ HeapWord* G1CollectedHeap::humongous_obj_allocate(size_t word_size) {
return result;
}
HeapWord* G1CollectedHeap::allocate_new_tlab(size_t word_size) {
HeapWord* G1CollectedHeap::allocate_new_tlab(size_t min_size,
size_t requested_size,
size_t* actual_size) {
assert_heap_not_locked_and_not_at_safepoint();
assert(!is_humongous(word_size), "we do not allow humongous TLABs");
assert(!is_humongous(requested_size), "we do not allow humongous TLABs");
return attempt_allocation(word_size);
return attempt_allocation(min_size, requested_size, actual_size);
}
HeapWord*
@ -399,7 +401,8 @@ G1CollectedHeap::mem_allocate(size_t word_size,
if (is_humongous(word_size)) {
return attempt_allocation_humongous(word_size);
}
return attempt_allocation(word_size);
size_t dummy = 0;
return attempt_allocation(word_size, word_size, &dummy);
}
HeapWord* G1CollectedHeap::attempt_allocation_slow(size_t word_size) {
@ -492,8 +495,8 @@ HeapWord* G1CollectedHeap::attempt_allocation_slow(size_t word_size) {
// first attempt (without holding the Heap_lock) here and the
// follow-on attempt will be at the start of the next loop
// iteration (after taking the Heap_lock).
result = _allocator->attempt_allocation(word_size);
size_t dummy = 0;
result = _allocator->attempt_allocation(word_size, word_size, &dummy);
if (result != NULL) {
return result;
}
@ -722,20 +725,28 @@ void G1CollectedHeap::fill_archive_regions(MemRegion* ranges, size_t count) {
}
}
inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size) {
inline HeapWord* G1CollectedHeap::attempt_allocation(size_t min_word_size,
size_t desired_word_size,
size_t* actual_word_size) {
assert_heap_not_locked_and_not_at_safepoint();
assert(!is_humongous(word_size), "attempt_allocation() should not "
assert(!is_humongous(desired_word_size), "attempt_allocation() should not "
"be called for humongous allocation requests");
HeapWord* result = _allocator->attempt_allocation(word_size);
HeapWord* result = _allocator->attempt_allocation(min_word_size, desired_word_size, actual_word_size);
if (result == NULL) {
result = attempt_allocation_slow(word_size);
*actual_word_size = desired_word_size;
result = attempt_allocation_slow(desired_word_size);
}
assert_heap_not_locked();
if (result != NULL) {
dirty_young_block(result, word_size);
assert(*actual_word_size != 0, "Actual size must have been set here");
dirty_young_block(result, *actual_word_size);
} else {
*actual_word_size = 0;
}
return result;
}

@ -420,7 +420,9 @@ private:
// humongous allocation requests should go to mem_allocate() which
// will satisfy them with a special path.
virtual HeapWord* allocate_new_tlab(size_t word_size);
virtual HeapWord* allocate_new_tlab(size_t min_size,
size_t requested_size,
size_t* actual_size);
virtual HeapWord* mem_allocate(size_t word_size,
bool* gc_overhead_limit_was_exceeded);
@ -428,7 +430,9 @@ private:
// First-level mutator allocation attempt: try to allocate out of
// the mutator alloc region without taking the Heap_lock. This
// should only be used for non-humongous allocations.
inline HeapWord* attempt_allocation(size_t word_size);
inline HeapWord* attempt_allocation(size_t min_word_size,
size_t desired_word_size,
size_t* actual_word_size);
// Second-level mutator allocation attempt: take the Heap_lock and
// retry the allocation attempt, potentially scheduling a GC

@ -478,8 +478,13 @@ size_t ParallelScavengeHeap::unsafe_max_tlab_alloc(Thread* thr) const {
return young_gen()->eden_space()->unsafe_max_tlab_alloc(thr);
}
HeapWord* ParallelScavengeHeap::allocate_new_tlab(size_t size) {
return young_gen()->allocate(size);
HeapWord* ParallelScavengeHeap::allocate_new_tlab(size_t min_size, size_t requested_size, size_t* actual_size) {
HeapWord* result = young_gen()->allocate(requested_size);
if (result != NULL) {
*actual_size = requested_size;
}
return result;
}
void ParallelScavengeHeap::accumulate_statistics_all_tlabs() {

@ -85,7 +85,7 @@ class ParallelScavengeHeap : public CollectedHeap {
protected:
static inline size_t total_invocations();
HeapWord* allocate_new_tlab(size_t size);
HeapWord* allocate_new_tlab(size_t min_size, size_t requested_size, size_t* actual_size);
inline bool should_alloc_in_eden(size_t size) const;
inline void death_march_check(HeapWord* const result, size_t size);

@ -384,17 +384,24 @@ HeapWord* CollectedHeap::allocate_from_tlab_slow(Klass* klass, Thread* thread, s
return NULL;
}
// Allocate a new TLAB...
HeapWord* obj = Universe::heap()->allocate_new_tlab(new_tlab_size);
// Allocate a new TLAB requesting new_tlab_size. Any size
// between minimal and new_tlab_size is accepted.
size_t actual_tlab_size = 0;
size_t min_tlab_size = ThreadLocalAllocBuffer::compute_min_size(size);
HeapWord* obj = Universe::heap()->allocate_new_tlab(min_tlab_size, new_tlab_size, &actual_tlab_size);
if (obj == NULL) {
assert(actual_tlab_size == 0, "Allocation failed, but actual size was updated. min: " SIZE_FORMAT ", desired: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
min_tlab_size, new_tlab_size, actual_tlab_size);
return NULL;
}
assert(actual_tlab_size != 0, "Allocation succeeded but actual size not updated. obj at: " PTR_FORMAT " min: " SIZE_FORMAT ", desired: " SIZE_FORMAT,
p2i(obj), min_tlab_size, new_tlab_size);
AllocTracer::send_allocation_in_new_tlab(klass, obj, new_tlab_size * HeapWordSize, size * HeapWordSize, thread);
AllocTracer::send_allocation_in_new_tlab(klass, obj, actual_tlab_size * HeapWordSize, size * HeapWordSize, thread);
if (ZeroTLAB) {
// ..and clear it.
Copy::zero_to_words(obj, new_tlab_size);
Copy::zero_to_words(obj, actual_tlab_size);
} else {
// ...and zap just allocated object.
#ifdef ASSERT
@ -402,10 +409,10 @@ HeapWord* CollectedHeap::allocate_from_tlab_slow(Klass* klass, Thread* thread, s
// ensure that the returned space is not considered parsable by
// any concurrent GC thread.
size_t hdr_size = oopDesc::header_size();
Copy::fill_to_words(obj + hdr_size, new_tlab_size - hdr_size, badHeapWordVal);
Copy::fill_to_words(obj + hdr_size, actual_tlab_size - hdr_size, badHeapWordVal);
#endif // ASSERT
}
thread->tlab().fill(obj, obj + size, new_tlab_size);
thread->tlab().fill(obj, obj + size, actual_tlab_size);
return obj;
}
@ -506,7 +513,9 @@ void CollectedHeap::fill_with_objects(HeapWord* start, size_t words, bool zap)
fill_with_object_impl(start, words, zap);
}
HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
HeapWord* CollectedHeap::allocate_new_tlab(size_t min_size,
size_t requested_size,
size_t* actual_size) {
guarantee(false, "thread-local allocation buffers not supported");
return NULL;
}

@ -126,7 +126,13 @@ class CollectedHeap : public CHeapObj<mtInternal> {
CollectedHeap();
// Create a new tlab. All TLAB allocations must go through this.
virtual HeapWord* allocate_new_tlab(size_t size);
// To allow more flexible TLAB allocations min_size specifies
// the minimum size needed, while requested_size is the requested
// size based on ergonomics. The actually allocated size will be
// returned in actual_size.
virtual HeapWord* allocate_new_tlab(size_t min_size,
size_t requested_size,
size_t* actual_size);
// Accumulate statistics on all tlabs.
virtual void accumulate_statistics_all_tlabs();

@ -1146,11 +1146,18 @@ size_t GenCollectedHeap::unsafe_max_tlab_alloc(Thread* thr) const {
return 0;
}
HeapWord* GenCollectedHeap::allocate_new_tlab(size_t size) {
HeapWord* GenCollectedHeap::allocate_new_tlab(size_t min_size,
size_t requested_size,
size_t* actual_size) {
bool gc_overhead_limit_was_exceeded;
return mem_allocate_work(size /* size */,
true /* is_tlab */,
&gc_overhead_limit_was_exceeded);
HeapWord* result = mem_allocate_work(requested_size /* size */,
true /* is_tlab */,
&gc_overhead_limit_was_exceeded);
if (result != NULL) {
*actual_size = requested_size;
}
return result;
}
// Requires "*prev_ptr" to be non-NULL. Deletes and a block of minimal size

@ -297,7 +297,9 @@ public:
virtual size_t tlab_capacity(Thread* thr) const;
virtual size_t tlab_used(Thread* thr) const;
virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
virtual HeapWord* allocate_new_tlab(size_t size);
virtual HeapWord* allocate_new_tlab(size_t min_size,
size_t requested_size,
size_t* actual_size);
// The "requestor" generation is performing some garbage collection
// action for which it would be useful to have scratch space. The

@ -141,6 +141,9 @@ public:
// Otherwise return 0;
inline size_t compute_size(size_t obj_size);
// Compute the minimal needed tlab size for the given object size.
static inline size_t compute_min_size(size_t obj_size);
// Record slow allocation
inline void record_slow_allocation(size_t obj_size);

@ -54,18 +54,15 @@ inline HeapWord* ThreadLocalAllocBuffer::allocate(size_t size) {
}
inline size_t ThreadLocalAllocBuffer::compute_size(size_t obj_size) {
const size_t aligned_obj_size = align_object_size(obj_size);
// Compute the size for the new TLAB.
// The "last" tlab may be smaller to reduce fragmentation.
// unsafe_max_tlab_alloc is just a hint.
const size_t available_size = Universe::heap()->unsafe_max_tlab_alloc(myThread()) /
HeapWordSize;
size_t new_tlab_size = MIN3(available_size, desired_size() + aligned_obj_size, max_size());
size_t new_tlab_size = MIN3(available_size, desired_size() + align_object_size(obj_size), max_size());
// Make sure there's enough room for object and filler int[].
const size_t obj_plus_filler_size = aligned_obj_size + alignment_reserve();
if (new_tlab_size < obj_plus_filler_size) {
if (new_tlab_size < compute_min_size(obj_size)) {
// If there isn't enough room for the allocation, return failure.
log_trace(gc, tlab)("ThreadLocalAllocBuffer::compute_size(" SIZE_FORMAT ") returns failure",
obj_size);
@ -76,6 +73,11 @@ inline size_t ThreadLocalAllocBuffer::compute_size(size_t obj_size) {
return new_tlab_size;
}
inline size_t ThreadLocalAllocBuffer::compute_min_size(size_t obj_size) {
const size_t aligned_obj_size = align_object_size(obj_size);
const size_t size_with_reserve = aligned_obj_size + alignment_reserve();
return MAX2(size_with_reserve, MinTLABSize);
}
void ThreadLocalAllocBuffer::record_slow_allocation(size_t obj_size) {
// Raise size required to bypass TLAB next time. Why? Else there's