From dc184f1099e09cef095cd8438d88dd8a1e6f5522 Mon Sep 17 00:00:00 2001 From: Kelvin Nilsen Date: Thu, 16 May 2024 16:47:09 +0000 Subject: [PATCH] 8324649: Shenandoah: replace implementation of free set Reviewed-by: wkemper, ysr, rkennke --- .../share/gc/shenandoah/shenandoahFreeSet.cpp | 1398 ++++++++++++----- .../share/gc/shenandoah/shenandoahFreeSet.hpp | 361 ++++- .../share/gc/shenandoah/shenandoahFullGC.cpp | 1 - .../share/gc/shenandoah/shenandoahHeap.cpp | 16 +- .../gc/shenandoah/shenandoahSimpleBitMap.cpp | 291 ++++ .../gc/shenandoah/shenandoahSimpleBitMap.hpp | 170 ++ .../shenandoahSimpleBitMap.inline.hpp | 100 ++ .../test_shenandoahSimpleBitMap.cpp | 451 ++++++ 8 files changed, 2392 insertions(+), 396 deletions(-) create mode 100644 src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.cpp create mode 100644 src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.hpp create mode 100644 src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.inline.hpp create mode 100644 test/hotspot/gtest/gc/shenandoah/test_shenandoahSimpleBitMap.cpp diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 72a3f411ea0..c11d7e814e4 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2016, 2021, Red Hat, Inc. All rights reserved. + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,82 +29,658 @@ #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegionSet.hpp" #include "gc/shenandoah/shenandoahMarkingContext.inline.hpp" +#include "gc/shenandoah/shenandoahSimpleBitMap.hpp" +#include "gc/shenandoah/shenandoahSimpleBitMap.inline.hpp" #include "logging/logStream.hpp" #include "memory/resourceArea.hpp" #include "runtime/orderAccess.hpp" +static const char* partition_name(ShenandoahFreeSetPartitionId t) { + switch (t) { + case ShenandoahFreeSetPartitionId::NotFree: return "NotFree"; + case ShenandoahFreeSetPartitionId::Mutator: return "Mutator"; + case ShenandoahFreeSetPartitionId::Collector: return "Collector"; + default: + ShouldNotReachHere(); + return "Unrecognized"; + } +} + +#ifndef PRODUCT +void ShenandoahRegionPartitions::dump_bitmap() const { + log_info(gc)("Mutator range [" SSIZE_FORMAT ", " SSIZE_FORMAT "], Collector range [" SSIZE_FORMAT ", " SSIZE_FORMAT "]", + _leftmosts[int(ShenandoahFreeSetPartitionId::Mutator)], + _rightmosts[int(ShenandoahFreeSetPartitionId::Mutator)], + _leftmosts[int(ShenandoahFreeSetPartitionId::Collector)], + _rightmosts[int(ShenandoahFreeSetPartitionId::Collector)]); + log_info(gc)("Empty Mutator range [" SSIZE_FORMAT ", " SSIZE_FORMAT + "], Empty Collector range [" SSIZE_FORMAT ", " SSIZE_FORMAT "]", + _leftmosts_empty[int(ShenandoahFreeSetPartitionId::Mutator)], + _rightmosts_empty[int(ShenandoahFreeSetPartitionId::Mutator)], + _leftmosts_empty[int(ShenandoahFreeSetPartitionId::Collector)], + _rightmosts_empty[int(ShenandoahFreeSetPartitionId::Collector)]); + + log_info(gc)("%6s: %18s %18s %18s", "index", "Mutator Bits", "Collector Bits", "NotFree Bits"); + dump_bitmap_range(0, _max-1); +} + +void ShenandoahRegionPartitions::dump_bitmap_range(idx_t start_region_idx, idx_t end_region_idx) const { + assert((start_region_idx >= 0) && (start_region_idx < (idx_t) _max), "precondition"); + assert((end_region_idx >= 0) && (end_region_idx < (idx_t) _max), "precondition"); + idx_t aligned_start = _membership[int(ShenandoahFreeSetPartitionId::Mutator)].aligned_index(start_region_idx); + idx_t aligned_end = _membership[int(ShenandoahFreeSetPartitionId::Mutator)].aligned_index(end_region_idx); + idx_t alignment = _membership[int(ShenandoahFreeSetPartitionId::Mutator)].alignment(); + while (aligned_start <= aligned_end) { + dump_bitmap_row(aligned_start); + aligned_start += alignment; + } +} + +void ShenandoahRegionPartitions::dump_bitmap_row(idx_t region_idx) const { + assert((region_idx >= 0) && (region_idx < (idx_t) _max), "precondition"); + idx_t aligned_idx = _membership[int(ShenandoahFreeSetPartitionId::Mutator)].aligned_index(region_idx); + uintx mutator_bits = _membership[int(ShenandoahFreeSetPartitionId::Mutator)].bits_at(aligned_idx); + uintx collector_bits = _membership[int(ShenandoahFreeSetPartitionId::Collector)].bits_at(aligned_idx); + uintx free_bits = mutator_bits | collector_bits; + uintx notfree_bits = ~free_bits; + log_info(gc)(SSIZE_FORMAT_W(6) ": " SIZE_FORMAT_X_0 " 0x" SIZE_FORMAT_X_0 " 0x" SIZE_FORMAT_X_0, + aligned_idx, mutator_bits, collector_bits, notfree_bits); +} +#endif + +ShenandoahRegionPartitions::ShenandoahRegionPartitions(size_t max_regions, ShenandoahFreeSet* free_set) : + _max(max_regions), + _region_size_bytes(ShenandoahHeapRegion::region_size_bytes()), + _free_set(free_set), + _membership{ ShenandoahSimpleBitMap(max_regions), ShenandoahSimpleBitMap(max_regions) } +{ + make_all_regions_unavailable(); +} + +inline bool ShenandoahFreeSet::can_allocate_from(ShenandoahHeapRegion *r) const { + return r->is_empty() || (r->is_trash() && !_heap->is_concurrent_weak_root_in_progress()); +} + +inline bool ShenandoahFreeSet::can_allocate_from(size_t idx) const { + ShenandoahHeapRegion* r = _heap->get_region(idx); + return can_allocate_from(r); +} + +inline size_t ShenandoahFreeSet::alloc_capacity(ShenandoahHeapRegion *r) const { + if (r->is_trash()) { + // This would be recycled on allocation path + return ShenandoahHeapRegion::region_size_bytes(); + } else { + return r->free(); + } +} + +inline size_t ShenandoahFreeSet::alloc_capacity(size_t idx) const { + ShenandoahHeapRegion* r = _heap->get_region(idx); + return alloc_capacity(r); +} + +inline bool ShenandoahFreeSet::has_alloc_capacity(ShenandoahHeapRegion *r) const { + return alloc_capacity(r) > 0; +} + +inline idx_t ShenandoahRegionPartitions::leftmost(ShenandoahFreeSetPartitionId which_partition) const { + assert (which_partition < NumPartitions, "selected free partition must be valid"); + idx_t idx = _leftmosts[int(which_partition)]; + if (idx >= _max) { + return _max; + } else { + // Cannot assert that membership[which_partition.is_set(idx) because this helper method may be used + // to query the original value of leftmost when leftmost must be adjusted because the interval representing + // which_partition is shrinking after the region that used to be leftmost is retired. + return idx; + } +} + +inline idx_t ShenandoahRegionPartitions::rightmost(ShenandoahFreeSetPartitionId which_partition) const { + assert (which_partition < NumPartitions, "selected free partition must be valid"); + idx_t idx = _rightmosts[int(which_partition)]; + // Cannot assert that membership[which_partition.is_set(idx) because this helper method may be used + // to query the original value of leftmost when leftmost must be adjusted because the interval representing + // which_partition is shrinking after the region that used to be leftmost is retired. + return idx; +} + +void ShenandoahRegionPartitions::make_all_regions_unavailable() { + for (size_t partition_id = 0; partition_id < IntNumPartitions; partition_id++) { + _membership[partition_id].clear_all(); + _leftmosts[partition_id] = _max; + _rightmosts[partition_id] = -1; + _leftmosts_empty[partition_id] = _max; + _rightmosts_empty[partition_id] = -1;; + _capacity[partition_id] = 0; + _used[partition_id] = 0; + } + _region_counts[int(ShenandoahFreeSetPartitionId::Mutator)] = _region_counts[int(ShenandoahFreeSetPartitionId::Collector)] = 0; +} + +void ShenandoahRegionPartitions::establish_mutator_intervals(idx_t mutator_leftmost, idx_t mutator_rightmost, + idx_t mutator_leftmost_empty, idx_t mutator_rightmost_empty, + size_t mutator_region_count, size_t mutator_used) { + _region_counts[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_region_count; + _leftmosts[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_leftmost; + _rightmosts[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_rightmost; + _leftmosts_empty[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_leftmost_empty; + _rightmosts_empty[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_rightmost_empty; + + _region_counts[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_region_count; + _used[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_used; + _capacity[int(ShenandoahFreeSetPartitionId::Mutator)] = mutator_region_count * _region_size_bytes; + + _leftmosts[int(ShenandoahFreeSetPartitionId::Collector)] = _max; + _rightmosts[int(ShenandoahFreeSetPartitionId::Collector)] = -1; + _leftmosts_empty[int(ShenandoahFreeSetPartitionId::Collector)] = _max; + _rightmosts_empty[int(ShenandoahFreeSetPartitionId::Collector)] = -1; + + _region_counts[int(ShenandoahFreeSetPartitionId::Collector)] = 0; + _used[int(ShenandoahFreeSetPartitionId::Collector)] = 0; + _capacity[int(ShenandoahFreeSetPartitionId::Collector)] = 0; +} + +void ShenandoahRegionPartitions::increase_used(ShenandoahFreeSetPartitionId which_partition, size_t bytes) { + assert (which_partition < NumPartitions, "Partition must be valid"); + _used[int(which_partition)] += bytes; + assert (_used[int(which_partition)] <= _capacity[int(which_partition)], + "Must not use (" SIZE_FORMAT ") more than capacity (" SIZE_FORMAT ") after increase by " SIZE_FORMAT, + _used[int(which_partition)], _capacity[int(which_partition)], bytes); +} + +inline void ShenandoahRegionPartitions::shrink_interval_if_range_modifies_either_boundary( + ShenandoahFreeSetPartitionId partition, idx_t low_idx, idx_t high_idx) { + assert((low_idx <= high_idx) && (low_idx >= 0) && (high_idx < _max), "Range must span legal index values"); + if (low_idx == leftmost(partition)) { + assert (!_membership[int(partition)].is_set(low_idx), "Do not shrink interval if region not removed"); + if (high_idx + 1 == _max) { + _leftmosts[int(partition)] = _max; + } else { + _leftmosts[int(partition)] = find_index_of_next_available_region(partition, high_idx + 1); + } + if (_leftmosts_empty[int(partition)] < _leftmosts[int(partition)]) { + // This gets us closer to where we need to be; we'll scan further when leftmosts_empty is requested. + _leftmosts_empty[int(partition)] = leftmost(partition); + } + } + if (high_idx == _rightmosts[int(partition)]) { + assert (!_membership[int(partition)].is_set(high_idx), "Do not shrink interval if region not removed"); + if (low_idx == 0) { + _rightmosts[int(partition)] = -1; + } else { + _rightmosts[int(partition)] = find_index_of_previous_available_region(partition, low_idx - 1); + } + if (_rightmosts_empty[int(partition)] > _rightmosts[int(partition)]) { + // This gets us closer to where we need to be; we'll scan further when rightmosts_empty is requested. + _rightmosts_empty[int(partition)] = _rightmosts[int(partition)]; + } + } + if (_leftmosts[int(partition)] > _rightmosts[int(partition)]) { + _leftmosts[int(partition)] = _max; + _rightmosts[int(partition)] = -1; + _leftmosts_empty[int(partition)] = _max; + _rightmosts_empty[int(partition)] = -1; + } +} + +inline void ShenandoahRegionPartitions::shrink_interval_if_boundary_modified(ShenandoahFreeSetPartitionId partition, idx_t idx) { + shrink_interval_if_range_modifies_either_boundary(partition, idx, idx); +} + +inline void ShenandoahRegionPartitions::expand_interval_if_boundary_modified(ShenandoahFreeSetPartitionId partition, + idx_t idx, size_t region_available) { + if (_leftmosts[int(partition)] > idx) { + _leftmosts[int(partition)] = idx; + } + if (_rightmosts[int(partition)] < idx) { + _rightmosts[int(partition)] = idx; + } + if (region_available == _region_size_bytes) { + if (_leftmosts_empty[int(partition)] > idx) { + _leftmosts_empty[int(partition)] = idx; + } + if (_rightmosts_empty[int(partition)] < idx) { + _rightmosts_empty[int(partition)] = idx; + } + } +} + +void ShenandoahRegionPartitions::retire_range_from_partition( + ShenandoahFreeSetPartitionId partition, idx_t low_idx, idx_t high_idx) { + + // Note: we may remove from free partition even if region is not entirely full, such as when available < PLAB::min_size() + assert ((low_idx < _max) && (high_idx < _max), "Both indices are sane: " SIZE_FORMAT " and " SIZE_FORMAT " < " SIZE_FORMAT, + low_idx, high_idx, _max); + assert (partition < NumPartitions, "Cannot remove from free partitions if not already free"); + + for (idx_t idx = low_idx; idx <= high_idx; idx++) { + assert (in_free_set(partition, idx), "Must be in partition to remove from partition"); + _membership[int(partition)].clear_bit(idx); + } + _region_counts[int(partition)] -= high_idx + 1 - low_idx; + shrink_interval_if_range_modifies_either_boundary(partition, low_idx, high_idx); +} + +void ShenandoahRegionPartitions::retire_from_partition(ShenandoahFreeSetPartitionId partition, idx_t idx, size_t used_bytes) { + + // Note: we may remove from free partition even if region is not entirely full, such as when available < PLAB::min_size() + assert (idx < _max, "index is sane: " SIZE_FORMAT " < " SIZE_FORMAT, idx, _max); + assert (partition < NumPartitions, "Cannot remove from free partitions if not already free"); + assert (in_free_set(partition, idx), "Must be in partition to remove from partition"); + + if (used_bytes < _region_size_bytes) { + // Count the alignment pad remnant of memory as used when we retire this region + increase_used(partition, _region_size_bytes - used_bytes); + } + _membership[int(partition)].clear_bit(idx); + shrink_interval_if_boundary_modified(partition, idx); + _region_counts[int(partition)]--; +} + +void ShenandoahRegionPartitions::make_free(idx_t idx, ShenandoahFreeSetPartitionId which_partition, size_t available) { + assert (idx < _max, "index is sane: " SIZE_FORMAT " < " SIZE_FORMAT, idx, _max); + assert (membership(idx) == ShenandoahFreeSetPartitionId::NotFree, "Cannot make free if already free"); + assert (which_partition < NumPartitions, "selected free partition must be valid"); + assert (available <= _region_size_bytes, "Available cannot exceed region size"); + + _membership[int(which_partition)].set_bit(idx); + _capacity[int(which_partition)] += _region_size_bytes; + _used[int(which_partition)] += _region_size_bytes - available; + expand_interval_if_boundary_modified(which_partition, idx, available); + + _region_counts[int(which_partition)]++; +} + +void ShenandoahRegionPartitions::move_from_partition_to_partition(idx_t idx, ShenandoahFreeSetPartitionId orig_partition, + ShenandoahFreeSetPartitionId new_partition, size_t available) { + assert (idx < _max, "index is sane: " SIZE_FORMAT " < " SIZE_FORMAT, idx, _max); + assert (orig_partition < NumPartitions, "Original partition must be valid"); + assert (new_partition < NumPartitions, "New partition must be valid"); + assert (available <= _region_size_bytes, "Available cannot exceed region size"); + + // Expected transitions: + // During rebuild: Mutator => Collector + // During flip_to_gc: Mutator empty => Collector + // At start of update refs: Collector => Mutator + assert (((available <= _region_size_bytes) && + (((orig_partition == ShenandoahFreeSetPartitionId::Mutator) + && (new_partition == ShenandoahFreeSetPartitionId::Collector)) || + ((orig_partition == ShenandoahFreeSetPartitionId::Collector) + && (new_partition == ShenandoahFreeSetPartitionId::Mutator)))) || + ((available == _region_size_bytes) && + ((orig_partition == ShenandoahFreeSetPartitionId::Mutator) + && (new_partition == ShenandoahFreeSetPartitionId::Collector))), "Unexpected movement between partitions"); + + size_t used = _region_size_bytes - available; + + _membership[int(orig_partition)].clear_bit(idx); + _membership[int(new_partition)].set_bit(idx); + + _capacity[int(orig_partition)] -= _region_size_bytes; + _used[int(orig_partition)] -= used; + shrink_interval_if_boundary_modified(orig_partition, idx); + + _capacity[int(new_partition)] += _region_size_bytes;; + _used[int(new_partition)] += used; + expand_interval_if_boundary_modified(new_partition, idx, available); + + _region_counts[int(orig_partition)]--; + _region_counts[int(new_partition)]++; +} + +const char* ShenandoahRegionPartitions::partition_membership_name(idx_t idx) const { + return partition_name(membership(idx)); +} + +inline ShenandoahFreeSetPartitionId ShenandoahRegionPartitions::membership(idx_t idx) const { + assert (idx < _max, "index is sane: " SIZE_FORMAT " < " SIZE_FORMAT, idx, _max); + ShenandoahFreeSetPartitionId result = ShenandoahFreeSetPartitionId::NotFree; + for (uint partition_id = 0; partition_id < UIntNumPartitions; partition_id++) { + if (_membership[partition_id].is_set(idx)) { + assert(result == ShenandoahFreeSetPartitionId::NotFree, "Region should reside in only one partition"); + result = (ShenandoahFreeSetPartitionId) partition_id; + } + } + return result; +} + +#ifdef ASSERT +inline bool ShenandoahRegionPartitions::partition_id_matches(idx_t idx, ShenandoahFreeSetPartitionId test_partition) const { + assert (idx < _max, "index is sane: " SIZE_FORMAT " < " SIZE_FORMAT, idx, _max); + assert (test_partition < ShenandoahFreeSetPartitionId::NotFree, "must be a valid partition"); + + return membership(idx) == test_partition; +} +#endif + +inline bool ShenandoahRegionPartitions::is_empty(ShenandoahFreeSetPartitionId which_partition) const { + assert (which_partition < NumPartitions, "selected free partition must be valid"); + return (leftmost(which_partition) > rightmost(which_partition)); +} + +inline idx_t ShenandoahRegionPartitions::find_index_of_next_available_region( + ShenandoahFreeSetPartitionId which_partition, idx_t start_index) const { + idx_t rightmost_idx = rightmost(which_partition); + idx_t leftmost_idx = leftmost(which_partition); + if ((rightmost_idx < leftmost_idx) || (start_index > rightmost_idx)) return _max; + if (start_index < leftmost_idx) { + start_index = leftmost_idx; + } + idx_t result = _membership[int(which_partition)].find_first_set_bit(start_index, rightmost_idx + 1); + if (result > rightmost_idx) { + result = _max; + } + assert (result >= start_index, "Requires progress"); + return result; +} + +inline idx_t ShenandoahRegionPartitions::find_index_of_previous_available_region( + ShenandoahFreeSetPartitionId which_partition, idx_t last_index) const { + idx_t rightmost_idx = rightmost(which_partition); + idx_t leftmost_idx = leftmost(which_partition); + // if (leftmost_idx == max) then (last_index < leftmost_idx) + if (last_index < leftmost_idx) return -1; + if (last_index > rightmost_idx) { + last_index = rightmost_idx; + } + idx_t result = _membership[int(which_partition)].find_last_set_bit(-1, last_index); + if (result < leftmost_idx) { + result = -1; + } + assert (result <= last_index, "Requires progress"); + return result; +} + +inline idx_t ShenandoahRegionPartitions::find_index_of_next_available_cluster_of_regions( + ShenandoahFreeSetPartitionId which_partition, idx_t start_index, size_t cluster_size) const { + idx_t rightmost_idx = rightmost(which_partition); + idx_t leftmost_idx = leftmost(which_partition); + if ((rightmost_idx < leftmost_idx) || (start_index > rightmost_idx)) return _max; + idx_t result = _membership[int(which_partition)].find_first_consecutive_set_bits(start_index, rightmost_idx + 1, cluster_size); + if (result > rightmost_idx) { + result = _max; + } + assert (result >= start_index, "Requires progress"); + return result; +} + +inline idx_t ShenandoahRegionPartitions::find_index_of_previous_available_cluster_of_regions( + ShenandoahFreeSetPartitionId which_partition, idx_t last_index, size_t cluster_size) const { + idx_t leftmost_idx = leftmost(which_partition); + // if (leftmost_idx == max) then (last_index < leftmost_idx) + if (last_index < leftmost_idx) return -1; + idx_t result = _membership[int(which_partition)].find_last_consecutive_set_bits(leftmost_idx - 1, last_index, cluster_size); + if (result <= leftmost_idx) { + result = -1; + } + assert (result <= last_index, "Requires progress"); + return result; +} + +idx_t ShenandoahRegionPartitions::leftmost_empty(ShenandoahFreeSetPartitionId which_partition) { + assert (which_partition < NumPartitions, "selected free partition must be valid"); + idx_t max_regions = _max; + if (_leftmosts_empty[int(which_partition)] == _max) { + return _max; + } + for (idx_t idx = find_index_of_next_available_region(which_partition, _leftmosts_empty[int(which_partition)]); + idx < max_regions; ) { + assert(in_free_set(which_partition, idx), "Boundaries or find_last_set_bit failed: " SSIZE_FORMAT, idx); + if (_free_set->alloc_capacity(idx) == _region_size_bytes) { + _leftmosts_empty[int(which_partition)] = idx; + return idx; + } + idx = find_index_of_next_available_region(which_partition, idx + 1); + } + _leftmosts_empty[int(which_partition)] = _max; + _rightmosts_empty[int(which_partition)] = -1; + return _max; +} + +idx_t ShenandoahRegionPartitions::rightmost_empty(ShenandoahFreeSetPartitionId which_partition) { + assert (which_partition < NumPartitions, "selected free partition must be valid"); + if (_rightmosts_empty[int(which_partition)] < 0) { + return -1; + } + for (idx_t idx = find_index_of_previous_available_region(which_partition, _rightmosts_empty[int(which_partition)]); + idx >= 0; ) { + assert(in_free_set(which_partition, idx), "Boundaries or find_last_set_bit failed: " SSIZE_FORMAT, idx); + if (_free_set->alloc_capacity(idx) == _region_size_bytes) { + _rightmosts_empty[int(which_partition)] = idx; + return idx; + } + idx = find_index_of_previous_available_region(which_partition, idx - 1); + } + _leftmosts_empty[int(which_partition)] = _max; + _rightmosts_empty[int(which_partition)] = -1; + return -1; +} + + +#ifdef ASSERT +void ShenandoahRegionPartitions::assert_bounds() { + + idx_t leftmosts[UIntNumPartitions]; + idx_t rightmosts[UIntNumPartitions]; + idx_t empty_leftmosts[UIntNumPartitions]; + idx_t empty_rightmosts[UIntNumPartitions]; + + for (uint i = 0; i < UIntNumPartitions; i++) { + leftmosts[i] = _max; + empty_leftmosts[i] = _max; + rightmosts[i] = -1; + empty_rightmosts[i] = -1; + } + + for (idx_t i = 0; i < _max; i++) { + ShenandoahFreeSetPartitionId partition = membership(i); + switch (partition) { + case ShenandoahFreeSetPartitionId::NotFree: + break; + + case ShenandoahFreeSetPartitionId::Mutator: + case ShenandoahFreeSetPartitionId::Collector: + { + size_t capacity = _free_set->alloc_capacity(i); + bool is_empty = (capacity == _region_size_bytes); + assert(capacity > 0, "free regions must have allocation capacity"); + if (i < leftmosts[int(partition)]) { + leftmosts[int(partition)] = i; + } + if (is_empty && (i < empty_leftmosts[int(partition)])) { + empty_leftmosts[int(partition)] = i; + } + if (i > rightmosts[int(partition)]) { + rightmosts[int(partition)] = i; + } + if (is_empty && (i > empty_rightmosts[int(partition)])) { + empty_rightmosts[int(partition)] = i; + } + break; + } + + default: + ShouldNotReachHere(); + } + } + + // Performance invariants. Failing these would not break the free partition, but performance would suffer. + assert (leftmost(ShenandoahFreeSetPartitionId::Mutator) <= _max, + "leftmost in bounds: " SSIZE_FORMAT " < " SSIZE_FORMAT, leftmost(ShenandoahFreeSetPartitionId::Mutator), _max); + assert (rightmost(ShenandoahFreeSetPartitionId::Mutator) < _max, + "rightmost in bounds: " SSIZE_FORMAT " < " SSIZE_FORMAT, rightmost(ShenandoahFreeSetPartitionId::Mutator), _max); + + assert (leftmost(ShenandoahFreeSetPartitionId::Mutator) == _max + || partition_id_matches(leftmost(ShenandoahFreeSetPartitionId::Mutator), ShenandoahFreeSetPartitionId::Mutator), + "leftmost region should be free: " SSIZE_FORMAT, leftmost(ShenandoahFreeSetPartitionId::Mutator)); + assert (leftmost(ShenandoahFreeSetPartitionId::Mutator) == _max + || partition_id_matches(rightmost(ShenandoahFreeSetPartitionId::Mutator), ShenandoahFreeSetPartitionId::Mutator), + "rightmost region should be free: " SSIZE_FORMAT, rightmost(ShenandoahFreeSetPartitionId::Mutator)); + + // If Mutator partition is empty, leftmosts will both equal max, rightmosts will both equal zero. + // Likewise for empty region partitions. + idx_t beg_off = leftmosts[int(ShenandoahFreeSetPartitionId::Mutator)]; + idx_t end_off = rightmosts[int(ShenandoahFreeSetPartitionId::Mutator)]; + assert (beg_off >= leftmost(ShenandoahFreeSetPartitionId::Mutator), + "free regions before the leftmost: " SSIZE_FORMAT ", bound " SSIZE_FORMAT, + beg_off, leftmost(ShenandoahFreeSetPartitionId::Mutator)); + assert (end_off <= rightmost(ShenandoahFreeSetPartitionId::Mutator), + "free regions past the rightmost: " SSIZE_FORMAT ", bound " SSIZE_FORMAT, + end_off, rightmost(ShenandoahFreeSetPartitionId::Mutator)); + + beg_off = empty_leftmosts[int(ShenandoahFreeSetPartitionId::Mutator)]; + end_off = empty_rightmosts[int(ShenandoahFreeSetPartitionId::Mutator)]; + assert (beg_off >= leftmost_empty(ShenandoahFreeSetPartitionId::Mutator), + "free empty regions before the leftmost: " SSIZE_FORMAT ", bound " SSIZE_FORMAT, + beg_off, leftmost_empty(ShenandoahFreeSetPartitionId::Mutator)); + assert (end_off <= rightmost_empty(ShenandoahFreeSetPartitionId::Mutator), + "free empty regions past the rightmost: " SSIZE_FORMAT ", bound " SSIZE_FORMAT, + end_off, rightmost_empty(ShenandoahFreeSetPartitionId::Mutator)); + + // Performance invariants. Failing these would not break the free partition, but performance would suffer. + assert (leftmost(ShenandoahFreeSetPartitionId::Collector) <= _max, "leftmost in bounds: " SSIZE_FORMAT " < " SSIZE_FORMAT, + leftmost(ShenandoahFreeSetPartitionId::Collector), _max); + assert (rightmost(ShenandoahFreeSetPartitionId::Collector) < _max, "rightmost in bounds: " SSIZE_FORMAT " < " SSIZE_FORMAT, + rightmost(ShenandoahFreeSetPartitionId::Collector), _max); + + assert (leftmost(ShenandoahFreeSetPartitionId::Collector) == _max + || partition_id_matches(leftmost(ShenandoahFreeSetPartitionId::Collector), ShenandoahFreeSetPartitionId::Collector), + "leftmost region should be free: " SSIZE_FORMAT, leftmost(ShenandoahFreeSetPartitionId::Collector)); + assert (leftmost(ShenandoahFreeSetPartitionId::Collector) == _max + || partition_id_matches(rightmost(ShenandoahFreeSetPartitionId::Collector), ShenandoahFreeSetPartitionId::Collector), + "rightmost region should be free: " SSIZE_FORMAT, rightmost(ShenandoahFreeSetPartitionId::Collector)); + + // If Collector partition is empty, leftmosts will both equal max, rightmosts will both equal zero. + // Likewise for empty region partitions. + beg_off = leftmosts[int(ShenandoahFreeSetPartitionId::Collector)]; + end_off = rightmosts[int(ShenandoahFreeSetPartitionId::Collector)]; + assert (beg_off >= leftmost(ShenandoahFreeSetPartitionId::Collector), + "free regions before the leftmost: " SSIZE_FORMAT ", bound " SSIZE_FORMAT, + beg_off, leftmost(ShenandoahFreeSetPartitionId::Collector)); + assert (end_off <= rightmost(ShenandoahFreeSetPartitionId::Collector), + "free regions past the rightmost: " SSIZE_FORMAT ", bound " SSIZE_FORMAT, + end_off, rightmost(ShenandoahFreeSetPartitionId::Collector)); + + beg_off = empty_leftmosts[int(ShenandoahFreeSetPartitionId::Collector)]; + end_off = empty_rightmosts[int(ShenandoahFreeSetPartitionId::Collector)]; + assert (beg_off >= _leftmosts_empty[int(ShenandoahFreeSetPartitionId::Collector)], + "free empty regions before the leftmost: " SSIZE_FORMAT ", bound " SSIZE_FORMAT, + beg_off, leftmost_empty(ShenandoahFreeSetPartitionId::Collector)); + assert (end_off <= _rightmosts_empty[int(ShenandoahFreeSetPartitionId::Collector)], + "free empty regions past the rightmost: " SSIZE_FORMAT ", bound " SSIZE_FORMAT, + end_off, rightmost_empty(ShenandoahFreeSetPartitionId::Collector)); +} +#endif + ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) : _heap(heap), - _mutator_free_bitmap(max_regions, mtGC), - _collector_free_bitmap(max_regions, mtGC), - _max(max_regions) + _partitions(max_regions, this), + _right_to_left_bias(false), + _alloc_bias_weight(0) { clear_internal(); } -void ShenandoahFreeSet::increase_used(size_t num_bytes) { - shenandoah_assert_heaplocked(); - _used += num_bytes; - - assert(_used <= _capacity, "must not use more than we have: used: " SIZE_FORMAT - ", capacity: " SIZE_FORMAT ", num_bytes: " SIZE_FORMAT, _used, _capacity, num_bytes); -} - -bool ShenandoahFreeSet::is_mutator_free(size_t idx) const { - assert (idx < _max, "index is sane: " SIZE_FORMAT " < " SIZE_FORMAT " (left: " SIZE_FORMAT ", right: " SIZE_FORMAT ")", - idx, _max, _mutator_leftmost, _mutator_rightmost); - return _mutator_free_bitmap.at(idx); -} - -bool ShenandoahFreeSet::is_collector_free(size_t idx) const { - assert (idx < _max, "index is sane: " SIZE_FORMAT " < " SIZE_FORMAT " (left: " SIZE_FORMAT ", right: " SIZE_FORMAT ")", - idx, _max, _collector_leftmost, _collector_rightmost); - return _collector_free_bitmap.at(idx); -} - HeapWord* ShenandoahFreeSet::allocate_single(ShenandoahAllocRequest& req, bool& in_new_region) { + shenandoah_assert_heaplocked(); + // Scan the bitmap looking for a first fit. // - // Leftmost and rightmost bounds provide enough caching to walk bitmap efficiently. Normally, - // we would find the region to allocate at right away. + // Leftmost and rightmost bounds provide enough caching to quickly find a region from which to allocate. // - // Allocations are biased: new application allocs go to beginning of the heap, and GC allocs - // go to the end. This makes application allocation faster, because we would clear lots - // of regions from the beginning most of the time. + // Allocations are biased: GC allocations are taken from the high end of the heap. Regular (and TLAB) + // mutator allocations are taken from the middle of heap, below the memory reserved for Collector. + // Humongous mutator allocations are taken from the bottom of the heap. // - // Free set maintains mutator and collector views, and normally they allocate in their views only, - // unless we special cases for stealing and mixed allocations. + // Free set maintains mutator and collector partitions. Mutator can only allocate from the + // Mutator partition. Collector prefers to allocate from the Collector partition, but may steal + // regions from the Mutator partition if the Collector partition has been depleted. switch (req.type()) { case ShenandoahAllocRequest::_alloc_tlab: case ShenandoahAllocRequest::_alloc_shared: { - // Try to allocate in the mutator view - for (size_t idx = _mutator_leftmost; idx <= _mutator_rightmost; idx++) { - if (is_mutator_free(idx)) { - HeapWord* result = try_allocate_in(_heap->get_region(idx), req, in_new_region); - if (result != nullptr) { - return result; + if (_alloc_bias_weight-- <= 0) { + // We have observed that regions not collected in previous GC cycle tend to congregate at one end or the other + // of the heap. Typically, these are the more recently engaged regions and the objects in these regions have not + // yet had a chance to die (and/or are treated as floating garbage). If we use the same allocation bias on each + // GC pass, these "most recently" engaged regions for GC pass N will also be the "most recently" engaged regions + // for GC pass N+1, and the relatively large amount of live data and/or floating garbage introduced + // during the most recent GC pass may once again prevent the region from being collected. We have found that + // alternating the allocation behavior between GC passes improves evacuation performance by 3-7% on certain + // benchmarks. In the best case, this has the effect of consuming these partially consumed regions before + // the start of the next mark cycle so all of their garbage can be efficiently reclaimed. + // + // First, finish consuming regions that are already partially consumed so as to more tightly limit ranges of + // available regions. Other potential benefits: + // 1. Eventual collection set has fewer regions because we have packed newly allocated objects into fewer regions + // 2. We preserve the "empty" regions longer into the GC cycle, reducing likelihood of allocation failures + // late in the GC cycle. + idx_t non_empty_on_left = (_partitions.leftmost_empty(ShenandoahFreeSetPartitionId::Mutator) + - _partitions.leftmost(ShenandoahFreeSetPartitionId::Mutator)); + idx_t non_empty_on_right = (_partitions.rightmost(ShenandoahFreeSetPartitionId::Mutator) + - _partitions.rightmost_empty(ShenandoahFreeSetPartitionId::Mutator)); + _right_to_left_bias = (non_empty_on_right > non_empty_on_left); + _alloc_bias_weight = _InitialAllocBiasWeight; + } + if (_right_to_left_bias) { + // Allocate within mutator free from high memory to low so as to preserve low memory for humongous allocations + if (!_partitions.is_empty(ShenandoahFreeSetPartitionId::Mutator)) { + // Use signed idx. Otherwise, loop will never terminate. + idx_t leftmost = _partitions.leftmost(ShenandoahFreeSetPartitionId::Mutator); + for (idx_t idx = _partitions.rightmost(ShenandoahFreeSetPartitionId::Mutator); idx >= leftmost; ) { + assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, idx), + "Boundaries or find_last_set_bit failed: " SSIZE_FORMAT, idx); + ShenandoahHeapRegion* r = _heap->get_region(idx); + // try_allocate_in() increases used if the allocation is successful. + HeapWord* result; + size_t min_size = (req.type() == ShenandoahAllocRequest::_alloc_tlab)? req.min_size(): req.size(); + if ((alloc_capacity(r) >= min_size) && ((result = try_allocate_in(r, req, in_new_region)) != nullptr)) { + return result; + } + idx = _partitions.find_index_of_previous_available_region(ShenandoahFreeSetPartitionId::Mutator, idx - 1); + } + } + } else { + // Allocate from low to high memory. This keeps the range of fully empty regions more tightly packed. + // Note that the most recently allocated regions tend not to be evacuated in a given GC cycle. So this + // tends to accumulate "fragmented" uncollected regions in high memory. + if (!_partitions.is_empty(ShenandoahFreeSetPartitionId::Mutator)) { + // Use signed idx. Otherwise, loop will never terminate. + idx_t rightmost = _partitions.rightmost(ShenandoahFreeSetPartitionId::Mutator); + for (idx_t idx = _partitions.leftmost(ShenandoahFreeSetPartitionId::Mutator); idx <= rightmost; ) { + assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, idx), + "Boundaries or find_last_set_bit failed: " SSIZE_FORMAT, idx); + ShenandoahHeapRegion* r = _heap->get_region(idx); + // try_allocate_in() increases used if the allocation is successful. + HeapWord* result; + size_t min_size = (req.type() == ShenandoahAllocRequest::_alloc_tlab)? req.min_size(): req.size(); + if ((alloc_capacity(r) >= min_size) && ((result = try_allocate_in(r, req, in_new_region)) != nullptr)) { + return result; + } + idx = _partitions.find_index_of_next_available_region(ShenandoahFreeSetPartitionId::Mutator, idx + 1); } } } - // There is no recovery. Mutator does not touch collector view at all. break; } case ShenandoahAllocRequest::_alloc_gclab: - case ShenandoahAllocRequest::_alloc_shared_gc: { - // size_t is unsigned, need to dodge underflow when _leftmost = 0 + // GCLABs are for evacuation so we must be in evacuation phase. + case ShenandoahAllocRequest::_alloc_shared_gc: { // Fast-path: try to allocate in the collector view first - for (size_t c = _collector_rightmost + 1; c > _collector_leftmost; c--) { - size_t idx = c - 1; - if (is_collector_free(idx)) { - HeapWord* result = try_allocate_in(_heap->get_region(idx), req, in_new_region); - if (result != nullptr) { - return result; - } + idx_t leftmost_collector = _partitions.leftmost(ShenandoahFreeSetPartitionId::Collector); + for (idx_t idx = _partitions.rightmost(ShenandoahFreeSetPartitionId::Collector); idx >= leftmost_collector; ) { + assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::Collector, idx), + "Boundaries or find_prev_last_bit failed: " SSIZE_FORMAT, idx); + HeapWord* result = try_allocate_in(_heap->get_region(idx), req, in_new_region); + if (result != nullptr) { + return result; } + idx = _partitions.find_index_of_previous_available_region(ShenandoahFreeSetPartitionId::Collector, idx - 1); } // No dice. Can we borrow space from mutator view? @@ -111,166 +688,166 @@ HeapWord* ShenandoahFreeSet::allocate_single(ShenandoahAllocRequest& req, bool& return nullptr; } - // Try to steal the empty region from the mutator view - for (size_t c = _mutator_rightmost + 1; c > _mutator_leftmost; c--) { - size_t idx = c - 1; - if (is_mutator_free(idx)) { - ShenandoahHeapRegion* r = _heap->get_region(idx); - if (can_allocate_from(r)) { - flip_to_gc(r); - HeapWord *result = try_allocate_in(r, req, in_new_region); - if (result != nullptr) { - return result; - } + // Try to steal an empty region from the mutator view. + idx_t leftmost_mutator_empty = _partitions.leftmost_empty(ShenandoahFreeSetPartitionId::Mutator); + for (idx_t idx = _partitions.rightmost_empty(ShenandoahFreeSetPartitionId::Mutator); idx >= leftmost_mutator_empty; ) { + assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, idx), + "Boundaries or find_prev_last_bit failed: " SSIZE_FORMAT, idx); + ShenandoahHeapRegion* r = _heap->get_region(idx); + if (can_allocate_from(r)) { + flip_to_gc(r); + HeapWord *result = try_allocate_in(r, req, in_new_region); + if (result != nullptr) { + log_debug(gc)("Flipped region " SIZE_FORMAT " to gc for request: " PTR_FORMAT, idx, p2i(&req)); + return result; } } + idx = _partitions.find_index_of_previous_available_region(ShenandoahFreeSetPartitionId::Mutator, idx - 1); } - // No dice. Do not try to mix mutator and GC allocations, because - // URWM moves due to GC allocations would expose unparsable mutator - // allocations. - + // No dice. Do not try to mix mutator and GC allocations, because adjusting region UWM + // due to GC allocations would expose unparsable mutator allocations. break; } default: ShouldNotReachHere(); } - return nullptr; } HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, ShenandoahAllocRequest& req, bool& in_new_region) { - assert (!has_no_alloc_capacity(r), "Performance: should avoid full regions on this path: " SIZE_FORMAT, r->index()); - - if (_heap->is_concurrent_weak_root_in_progress() && - r->is_trash()) { + assert (has_alloc_capacity(r), "Performance: should avoid full regions on this path: " SIZE_FORMAT, r->index()); + if (_heap->is_concurrent_weak_root_in_progress() && r->is_trash()) { return nullptr; } + HeapWord* result = nullptr; try_recycle_trashed(r); - in_new_region = r->is_empty(); - HeapWord* result = nullptr; - size_t size = req.size(); + if (in_new_region) { + log_debug(gc)("Using new region (" SIZE_FORMAT ") for %s (" PTR_FORMAT ").", + r->index(), ShenandoahAllocRequest::alloc_type_to_string(req.type()), p2i(&req)); + } + // req.size() is in words, r->free() is in bytes. if (req.is_lab_alloc()) { + // This is a GCLAB or a TLAB allocation + size_t adjusted_size = req.size(); size_t free = align_down(r->free() >> LogHeapWordSize, MinObjAlignment); - if (size > free) { - size = free; + if (adjusted_size > free) { + adjusted_size = free; } - if (size >= req.min_size()) { - result = r->allocate(size, req.type()); - assert (result != nullptr, "Allocation must succeed: free " SIZE_FORMAT ", actual " SIZE_FORMAT, free, size); + if (adjusted_size >= req.min_size()) { + result = r->allocate(adjusted_size, req.type()); + log_debug(gc)("Allocated " SIZE_FORMAT " words (adjusted from " SIZE_FORMAT ") for %s @" PTR_FORMAT + " from %s region " SIZE_FORMAT ", free bytes remaining: " SIZE_FORMAT, + adjusted_size, req.size(), ShenandoahAllocRequest::alloc_type_to_string(req.type()), p2i(result), + _partitions.partition_membership_name(r->index()), r->index(), r->free()); + assert (result != nullptr, "Allocation must succeed: free " SIZE_FORMAT ", actual " SIZE_FORMAT, free, adjusted_size); + req.set_actual_size(adjusted_size); + } else { + log_trace(gc, free)("Failed to shrink TLAB or GCLAB request (" SIZE_FORMAT ") in region " SIZE_FORMAT " to " SIZE_FORMAT + " because min_size() is " SIZE_FORMAT, req.size(), r->index(), adjusted_size, req.min_size()); } } else { + size_t size = req.size(); result = r->allocate(size, req.type()); + if (result != nullptr) { + // Record actual allocation size + log_debug(gc)("Allocated " SIZE_FORMAT " words for %s @" PTR_FORMAT + " from %s region " SIZE_FORMAT ", free bytes remaining: " SIZE_FORMAT, + size, ShenandoahAllocRequest::alloc_type_to_string(req.type()), p2i(result), + _partitions.partition_membership_name(r->index()), r->index(), r->free()); + req.set_actual_size(size); + } } if (result != nullptr) { // Allocation successful, bump stats: if (req.is_mutator_alloc()) { - increase_used(size * HeapWordSize); - } + _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); + } else { + assert(req.is_gc_alloc(), "Should be gc_alloc since req wasn't mutator alloc"); - // Record actual allocation size - req.set_actual_size(size); - - if (req.is_gc_alloc()) { + // For GC allocations, we advance update_watermark because the objects relocated into this memory during + // evacuation are not updated during evacuation. r->set_update_watermark(r->top()); } } - if (result == nullptr || has_no_alloc_capacity(r)) { - // Region cannot afford this or future allocations. Retire it. - // - // While this seems a bit harsh, especially in the case when this large allocation does not - // fit, but the next small one would, we are risking to inflate scan times when lots of - // almost-full regions precede the fully-empty region where we want allocate the entire TLAB. - // TODO: Record first fully-empty region, and use that for large allocations + static const size_t min_capacity = (size_t) (ShenandoahHeapRegion::region_size_bytes() * (1.0 - 1.0 / ShenandoahEvacWaste)); + size_t ac = alloc_capacity(r); - // Record the remainder as allocation waste - if (req.is_mutator_alloc()) { - size_t waste = r->free(); - if (waste > 0) { - increase_used(waste); - _heap->notify_mutator_alloc_words(waste >> LogHeapWordSize, true); - } - } + if (((result == nullptr) && (ac < min_capacity)) || (alloc_capacity(r) < PLAB::min_size() * HeapWordSize)) { + // Regardless of whether this allocation succeeded, if the remaining memory is less than PLAB:min_size(), retire this region. + // Note that retire_from_partition() increases used to account for waste. - size_t num = r->index(); - _collector_free_bitmap.clear_bit(num); - _mutator_free_bitmap.clear_bit(num); - // Touched the bounds? Need to update: - if (touches_bounds(num)) { - adjust_bounds(); - } - assert_bounds(); + // Also, if this allocation request failed and the consumed within this region * ShenandoahEvacWaste > region size, + // then retire the region so that subsequent searches can find available memory more quickly. + + size_t idx = r->index(); + _partitions.retire_from_partition(req.is_mutator_alloc()? + ShenandoahFreeSetPartitionId::Mutator: ShenandoahFreeSetPartitionId::Collector, + idx, r->used()); + _partitions.assert_bounds(); } return result; } -bool ShenandoahFreeSet::touches_bounds(size_t num) const { - return num == _collector_leftmost || num == _collector_rightmost || num == _mutator_leftmost || num == _mutator_rightmost; -} - -void ShenandoahFreeSet::recompute_bounds() { - // Reset to the most pessimistic case: - _mutator_rightmost = _max - 1; - _mutator_leftmost = 0; - _collector_rightmost = _max - 1; - _collector_leftmost = 0; - - // ...and adjust from there - adjust_bounds(); -} - -void ShenandoahFreeSet::adjust_bounds() { - // Rewind both mutator bounds until the next bit. - while (_mutator_leftmost < _max && !is_mutator_free(_mutator_leftmost)) { - _mutator_leftmost++; - } - while (_mutator_rightmost > 0 && !is_mutator_free(_mutator_rightmost)) { - _mutator_rightmost--; - } - // Rewind both collector bounds until the next bit. - while (_collector_leftmost < _max && !is_collector_free(_collector_leftmost)) { - _collector_leftmost++; - } - while (_collector_rightmost > 0 && !is_collector_free(_collector_rightmost)) { - _collector_rightmost--; - } -} - HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req) { + assert(req.is_mutator_alloc(), "All humongous allocations are performed by mutator"); shenandoah_assert_heaplocked(); size_t words_size = req.size(); - size_t num = ShenandoahHeapRegion::required_regions(words_size * HeapWordSize); + idx_t num = ShenandoahHeapRegion::required_regions(words_size * HeapWordSize); - // No regions left to satisfy allocation, bye. - if (num > mutator_count()) { + // Check if there are enough regions left to satisfy allocation. + if (num > (idx_t) _partitions.count(ShenandoahFreeSetPartitionId::Mutator)) { return nullptr; } + idx_t start_range = _partitions.leftmost_empty(ShenandoahFreeSetPartitionId::Mutator); + idx_t end_range = _partitions.rightmost_empty(ShenandoahFreeSetPartitionId::Mutator) + 1; + idx_t last_possible_start = end_range - num; + // Find the continuous interval of $num regions, starting from $beg and ending in $end, // inclusive. Contiguous allocations are biased to the beginning. - - size_t beg = _mutator_leftmost; - size_t end = beg; + idx_t beg = _partitions.find_index_of_next_available_cluster_of_regions(ShenandoahFreeSetPartitionId::Mutator, + start_range, num); + if (beg > last_possible_start) { + // Hit the end, goodbye + return nullptr; + } + idx_t end = beg; while (true) { - if (end >= _max) { - // Hit the end, goodbye - return nullptr; - } - - // If regions are not adjacent, then current [beg; end] is useless, and we may fast-forward. - // If region is not completely free, the current [beg; end] is useless, and we may fast-forward. - if (!is_mutator_free(end) || !can_allocate_from(_heap->get_region(end))) { - end++; - beg = end; - continue; + // We've confirmed num contiguous regions belonging to Mutator partition, so no need to confirm membership. + // If region is not completely free, the current [beg; end] is useless, and we may fast-forward. If we can extend + // the existing range, we can exploit that certain regions are already known to be in the Mutator free set. + while (!can_allocate_from(_heap->get_region(end))) { + // region[end] is not empty, so we restart our search after region[end] + idx_t slide_delta = end + 1 - beg; + if (beg + slide_delta > last_possible_start) { + // no room to slide + return nullptr; + } + for (idx_t span_end = beg + num; slide_delta > 0; slide_delta--) { + if (!_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, span_end)) { + beg = _partitions.find_index_of_next_available_cluster_of_regions(ShenandoahFreeSetPartitionId::Mutator, + span_end + 1, num); + break; + } else { + beg++; + span_end++; + } + } + // Here, either beg identifies a range of num regions all of which are in the Mutator free set, or beg > last_possible_start + if (beg > last_possible_start) { + // Hit the end, goodbye + return nullptr; + } + end = beg; } if ((end - beg + 1) == num) { @@ -282,9 +859,8 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req) { } size_t remainder = words_size & ShenandoahHeapRegion::region_size_words_mask(); - // Initialize regions: - for (size_t i = beg; i <= end; i++) { + for (idx_t i = beg; i <= end; i++) { ShenandoahHeapRegion* r = _heap->get_region(i); try_recycle_trashed(r); @@ -306,46 +882,23 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req) { } r->set_top(r->bottom() + used_words); - - _mutator_free_bitmap.clear_bit(r->index()); } - // While individual regions report their true use, all humongous regions are - // marked used in the free set. - increase_used(ShenandoahHeapRegion::region_size_bytes() * num); - if (remainder != 0) { // Record this remainder as allocation waste _heap->notify_mutator_alloc_words(ShenandoahHeapRegion::region_size_words() - remainder, true); } - // Allocated at left/rightmost? Move the bounds appropriately. - if (beg == _mutator_leftmost || end == _mutator_rightmost) { - adjust_bounds(); - } - assert_bounds(); + // retire_range_from_partition() will adjust bounds on Mutator free set if appropriate + _partitions.retire_range_from_partition(ShenandoahFreeSetPartitionId::Mutator, beg, end); + size_t total_humongous_size = ShenandoahHeapRegion::region_size_bytes() * num; + _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, total_humongous_size); + _partitions.assert_bounds(); req.set_actual_size(words_size); return _heap->get_region(beg)->bottom(); } -bool ShenandoahFreeSet::can_allocate_from(ShenandoahHeapRegion *r) { - return r->is_empty() || (r->is_trash() && !_heap->is_concurrent_weak_root_in_progress()); -} - -size_t ShenandoahFreeSet::alloc_capacity(ShenandoahHeapRegion *r) { - if (r->is_trash()) { - // This would be recycled on allocation path - return ShenandoahHeapRegion::region_size_bytes(); - } else { - return r->free(); - } -} - -bool ShenandoahFreeSet::has_no_alloc_capacity(ShenandoahHeapRegion *r) { - return alloc_capacity(r) == 0; -} - void ShenandoahFreeSet::try_recycle_trashed(ShenandoahHeapRegion *r) { if (r->is_trash()) { _heap->decrease_used(r->used()); @@ -370,20 +923,16 @@ void ShenandoahFreeSet::recycle_trash() { void ShenandoahFreeSet::flip_to_gc(ShenandoahHeapRegion* r) { size_t idx = r->index(); - assert(_mutator_free_bitmap.at(idx), "Should be in mutator view"); + assert(_partitions.partition_id_matches(idx, ShenandoahFreeSetPartitionId::Mutator), "Should be in mutator view"); assert(can_allocate_from(r), "Should not be allocated"); - _mutator_free_bitmap.clear_bit(idx); - _collector_free_bitmap.set_bit(idx); - _collector_leftmost = MIN2(idx, _collector_leftmost); - _collector_rightmost = MAX2(idx, _collector_rightmost); + size_t ac = alloc_capacity(r); + _partitions.move_from_partition_to_partition(idx, ShenandoahFreeSetPartitionId::Mutator, + ShenandoahFreeSetPartitionId::Collector, ac); + _partitions.assert_bounds(); - _capacity -= alloc_capacity(r); - - if (touches_bounds(idx)) { - adjust_bounds(); - } - assert_bounds(); + // We do not ensure that the region is no longer trash, relying on try_allocate_in(), which always comes next, + // to recycle trash before attempting to allocate anything in the region. } void ShenandoahFreeSet::clear() { @@ -392,67 +941,259 @@ void ShenandoahFreeSet::clear() { } void ShenandoahFreeSet::clear_internal() { - _mutator_free_bitmap.clear(); - _collector_free_bitmap.clear(); - _mutator_leftmost = _max; - _mutator_rightmost = 0; - _collector_leftmost = _max; - _collector_rightmost = 0; - _capacity = 0; - _used = 0; + _partitions.make_all_regions_unavailable(); } -void ShenandoahFreeSet::rebuild() { - shenandoah_assert_heaplocked(); - clear(); +void ShenandoahFreeSet::find_regions_with_alloc_capacity(size_t &cset_regions) { + + cset_regions = 0; + clear_internal(); + size_t region_size_bytes = _partitions.region_size_bytes(); + size_t max_regions = _partitions.max_regions(); + + size_t mutator_leftmost = max_regions; + size_t mutator_rightmost = 0; + size_t mutator_leftmost_empty = max_regions; + size_t mutator_rightmost_empty = 0; + + size_t mutator_regions = 0; + size_t mutator_used = 0; for (size_t idx = 0; idx < _heap->num_regions(); idx++) { ShenandoahHeapRegion* region = _heap->get_region(idx); + if (region->is_trash()) { + // Trashed regions represent regions that had been in the collection partition but have not yet been "cleaned up". + // The cset regions are not "trashed" until we have finished update refs. + cset_regions++; + } if (region->is_alloc_allowed() || region->is_trash()) { - assert(!region->is_cset(), "Shouldn't be adding those to the free set"); - // Do not add regions that would surely fail allocation - if (has_no_alloc_capacity(region)) continue; - - _capacity += alloc_capacity(region); - assert(_used <= _capacity, "must not use more than we have"); - - assert(!is_mutator_free(idx), "We are about to add it, it shouldn't be there already"); - _mutator_free_bitmap.set_bit(idx); - } - } - - // Evac reserve: reserve trailing space for evacuations - size_t to_reserve = _heap->max_capacity() / 100 * ShenandoahEvacReserve; - size_t reserved = 0; - - for (size_t idx = _heap->num_regions() - 1; idx > 0; idx--) { - if (reserved >= to_reserve) break; - - ShenandoahHeapRegion* region = _heap->get_region(idx); - if (_mutator_free_bitmap.at(idx) && can_allocate_from(region)) { - _mutator_free_bitmap.clear_bit(idx); - _collector_free_bitmap.set_bit(idx); + // Do not add regions that would almost surely fail allocation size_t ac = alloc_capacity(region); - _capacity -= ac; - reserved += ac; + if (ac > PLAB::min_size() * HeapWordSize) { + _partitions.raw_assign_membership(idx, ShenandoahFreeSetPartitionId::Mutator); + + if (idx < mutator_leftmost) { + mutator_leftmost = idx; + } + if (idx > mutator_rightmost) { + mutator_rightmost = idx; + } + if (ac == region_size_bytes) { + if (idx < mutator_leftmost_empty) { + mutator_leftmost_empty = idx; + } + if (idx > mutator_rightmost_empty) { + mutator_rightmost_empty = idx; + } + } + mutator_regions++; + mutator_used += (region_size_bytes - ac); + + log_debug(gc)( + " Adding Region " SIZE_FORMAT " (Free: " SIZE_FORMAT "%s, Used: " SIZE_FORMAT "%s) to mutator partition", + idx, byte_size_in_proper_unit(region->free()), proper_unit_for_byte_size(region->free()), + byte_size_in_proper_unit(region->used()), proper_unit_for_byte_size(region->used())); + } + } + } + _partitions.establish_mutator_intervals(mutator_leftmost, mutator_rightmost, mutator_leftmost_empty, mutator_rightmost_empty, + mutator_regions, mutator_used); +} + +void ShenandoahFreeSet::move_regions_from_collector_to_mutator(size_t max_xfer_regions) { + size_t region_size_bytes = ShenandoahHeapRegion::region_size_bytes(); + size_t collector_empty_xfer = 0; + size_t collector_not_empty_xfer = 0; + + // Process empty regions within the Collector free partition + if ((max_xfer_regions > 0) && + (_partitions.leftmost_empty(ShenandoahFreeSetPartitionId::Collector) + <= _partitions.rightmost_empty(ShenandoahFreeSetPartitionId::Collector))) { + ShenandoahHeapLocker locker(_heap->lock()); + idx_t rightmost = _partitions.rightmost_empty(ShenandoahFreeSetPartitionId::Collector); + for (idx_t idx = _partitions.leftmost_empty(ShenandoahFreeSetPartitionId::Collector); + (max_xfer_regions > 0) && (idx <= rightmost); ) { + assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::Collector, idx), + "Boundaries or find_first_set_bit failed: " SSIZE_FORMAT, idx); + // Note: can_allocate_from() denotes that region is entirely empty + if (can_allocate_from(idx)) { + _partitions.move_from_partition_to_partition(idx, ShenandoahFreeSetPartitionId::Collector, + ShenandoahFreeSetPartitionId::Mutator, region_size_bytes); + max_xfer_regions--; + collector_empty_xfer += region_size_bytes; + } + idx = _partitions.find_index_of_next_available_region(ShenandoahFreeSetPartitionId::Collector, idx + 1); } } - recompute_bounds(); - assert_bounds(); + // If there are any non-empty regions within Collector partition, we can also move them to the Mutator free partition + if ((max_xfer_regions > 0) && (_partitions.leftmost(ShenandoahFreeSetPartitionId::Collector) + <= _partitions.rightmost(ShenandoahFreeSetPartitionId::Collector))) { + ShenandoahHeapLocker locker(_heap->lock()); + idx_t rightmost = _partitions.rightmost(ShenandoahFreeSetPartitionId::Collector); + for (idx_t idx = _partitions.leftmost(ShenandoahFreeSetPartitionId::Collector); + (max_xfer_regions > 0) && (idx <= rightmost); ) { + assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::Collector, idx), + "Boundaries or find_first_set_bit failed: " SSIZE_FORMAT, idx); + size_t ac = alloc_capacity(idx); + if (ac > 0) { + _partitions.move_from_partition_to_partition(idx, ShenandoahFreeSetPartitionId::Collector, + ShenandoahFreeSetPartitionId::Mutator, ac); + max_xfer_regions--; + collector_not_empty_xfer += ac; + } + idx = _partitions.find_index_of_next_available_region(ShenandoahFreeSetPartitionId::Collector, idx + 1); + } + } + + size_t collector_xfer = collector_empty_xfer + collector_not_empty_xfer; + log_info(gc)("At start of update refs, moving " SIZE_FORMAT "%s to Mutator free partition from Collector Reserve", + byte_size_in_proper_unit(collector_xfer), proper_unit_for_byte_size(collector_xfer)); +} + +void ShenandoahFreeSet::prepare_to_rebuild(size_t &cset_regions) { + shenandoah_assert_heaplocked(); + + log_debug(gc)("Rebuilding FreeSet"); + + // This places regions that have alloc_capacity into the mutator partition. + find_regions_with_alloc_capacity(cset_regions); +} + +void ShenandoahFreeSet::finish_rebuild(size_t cset_regions) { + shenandoah_assert_heaplocked(); + + // Our desire is to reserve this much memory for future evacuation. We may end up reserving less, if + // memory is in short supply. + + size_t reserve = _heap->max_capacity() * ShenandoahEvacReserve / 100; + size_t available_in_collector_partition = (_partitions.capacity_of(ShenandoahFreeSetPartitionId::Collector) + - _partitions.used_by(ShenandoahFreeSetPartitionId::Collector)); + size_t additional_reserve; + if (available_in_collector_partition < reserve) { + additional_reserve = reserve - available_in_collector_partition; + } else { + additional_reserve = 0; + } + + reserve_regions(reserve); + _partitions.assert_bounds(); + log_status(); +} + +void ShenandoahFreeSet::rebuild() { + size_t cset_regions; + prepare_to_rebuild(cset_regions); + finish_rebuild(cset_regions); +} + +void ShenandoahFreeSet::reserve_regions(size_t to_reserve) { + for (size_t i = _heap->num_regions(); i > 0; i--) { + size_t idx = i - 1; + ShenandoahHeapRegion* r = _heap->get_region(idx); + + if (!_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, idx)) { + continue; + } + + size_t ac = alloc_capacity(r); + assert (ac > 0, "Membership in free partition implies has capacity"); + + bool move_to_collector = _partitions.available_in(ShenandoahFreeSetPartitionId::Collector) < to_reserve; + if (!move_to_collector) { + // We've satisfied to_reserve + break; + } + + if (move_to_collector) { + // Note: In a previous implementation, regions were only placed into the survivor space (collector_is_free) if + // they were entirely empty. This has the effect of causing new Mutator allocation to reside next to objects + // that have already survived at least one GC, mixing ephemeral with longer-lived objects in the same region. + // Any objects that have survived a GC are less likely to immediately become garbage, so a region that contains + // survivor objects is less likely to be selected for the collection set. This alternative implementation allows + // survivor regions to continue accumulating other survivor objects, and makes it more likely that ephemeral objects + // occupy regions comprised entirely of ephemeral objects. These regions are highly likely to be included in the next + // collection set, and they are easily evacuated because they have low density of live objects. + _partitions.move_from_partition_to_partition(idx, ShenandoahFreeSetPartitionId::Mutator, + ShenandoahFreeSetPartitionId::Collector, ac); + log_debug(gc)(" Shifting region " SIZE_FORMAT " from mutator_free to collector_free", idx); + } + } + + if (LogTarget(Info, gc, free)::is_enabled()) { + size_t reserve = _partitions.capacity_of(ShenandoahFreeSetPartitionId::Collector); + if (reserve < to_reserve) { + log_debug(gc)("Wanted " PROPERFMT " for young reserve, but only reserved: " PROPERFMT, + PROPERFMTARGS(to_reserve), PROPERFMTARGS(reserve)); + } + } } void ShenandoahFreeSet::log_status() { shenandoah_assert_heaplocked(); - LogTarget(Info, gc, ergo) lt; +#ifdef ASSERT + // Dump of the FreeSet details is only enabled if assertions are enabled + if (LogTarget(Debug, gc, free)::is_enabled()) { +#define BUFFER_SIZE 80 + size_t region_size_bytes = ShenandoahHeapRegion::region_size_bytes(); + size_t consumed_collector = 0; + size_t available_collector = 0; + size_t consumed_mutator = 0; + size_t available_mutator = 0; + + char buffer[BUFFER_SIZE]; + for (uint i = 0; i < BUFFER_SIZE; i++) { + buffer[i] = '\0'; + } + log_debug(gc)("FreeSet map legend: M:mutator_free C:collector_free H:humongous _:retired"); + log_debug(gc)(" mutator free range [" SIZE_FORMAT ".." SIZE_FORMAT "]," + " collector free range [" SIZE_FORMAT ".." SIZE_FORMAT "]", + _partitions.leftmost(ShenandoahFreeSetPartitionId::Mutator), + _partitions.rightmost(ShenandoahFreeSetPartitionId::Mutator), + _partitions.leftmost(ShenandoahFreeSetPartitionId::Collector), + _partitions.rightmost(ShenandoahFreeSetPartitionId::Collector)); + + for (uint i = 0; i < _heap->num_regions(); i++) { + ShenandoahHeapRegion *r = _heap->get_region(i); + uint idx = i % 64; + if ((i != 0) && (idx == 0)) { + log_debug(gc)(" %6u: %s", i-64, buffer); + } + if (_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, i)) { + size_t capacity = alloc_capacity(r); + available_mutator += capacity; + consumed_mutator += region_size_bytes - capacity; + buffer[idx] = (capacity == region_size_bytes)? 'M': 'm'; + } else if (_partitions.in_free_set(ShenandoahFreeSetPartitionId::Collector, i)) { + size_t capacity = alloc_capacity(r); + available_collector += capacity; + consumed_collector += region_size_bytes - capacity; + buffer[idx] = (capacity == region_size_bytes)? 'C': 'c'; + } else if (r->is_humongous()) { + buffer[idx] = 'h'; + } else { + buffer[idx] = '_'; + } + } + uint remnant = _heap->num_regions() % 64; + if (remnant > 0) { + buffer[remnant] = '\0'; + } else { + remnant = 64; + } + log_debug(gc)(" %6u: %s", (uint) (_heap->num_regions() - remnant), buffer); + } +#endif + + LogTarget(Info, gc, free) lt; if (lt.is_enabled()) { ResourceMark rm; LogStream ls(lt); { - size_t last_idx = 0; + idx_t last_idx = 0; size_t max = 0; size_t max_contig = 0; size_t empty_contig = 0; @@ -461,13 +1202,12 @@ void ShenandoahFreeSet::log_status() { size_t total_free = 0; size_t total_free_ext = 0; - for (size_t idx = _mutator_leftmost; idx <= _mutator_rightmost; idx++) { - if (is_mutator_free(idx)) { + for (idx_t idx = _partitions.leftmost(ShenandoahFreeSetPartitionId::Mutator); + idx <= _partitions.rightmost(ShenandoahFreeSetPartitionId::Mutator); idx++) { + if (_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, idx)) { ShenandoahHeapRegion *r = _heap->get_region(idx); size_t free = alloc_capacity(r); - max = MAX2(max, free); - if (r->is_empty()) { total_free_ext += free; if (last_idx + 1 == idx) { @@ -478,10 +1218,8 @@ void ShenandoahFreeSet::log_status() { } else { empty_contig = 0; } - total_used += r->used(); total_free += free; - max_contig = MAX2(max_contig, empty_contig); last_idx = idx; } @@ -490,8 +1228,13 @@ void ShenandoahFreeSet::log_status() { size_t max_humongous = max_contig * ShenandoahHeapRegion::region_size_bytes(); size_t free = capacity() - used(); + // Since certain regions that belonged to the Mutator free partition at the time of most recent rebuild may have been + // retired, the sum of used and capacities within regions that are still in the Mutator free partition may not match + // my internally tracked values of used() and free(). + assert(free == total_free, "Free memory should match"); + ls.print("Free: " SIZE_FORMAT "%s, Max: " SIZE_FORMAT "%s regular, " SIZE_FORMAT "%s humongous, ", - byte_size_in_proper_unit(total_free), proper_unit_for_byte_size(total_free), + byte_size_in_proper_unit(free), proper_unit_for_byte_size(free), byte_size_in_proper_unit(max), proper_unit_for_byte_size(max), byte_size_in_proper_unit(max_humongous), proper_unit_for_byte_size(max_humongous) ); @@ -506,38 +1249,43 @@ void ShenandoahFreeSet::log_status() { ls.print(SIZE_FORMAT "%% external, ", frag_ext); size_t frag_int; - if (mutator_count() > 0) { - frag_int = (100 * (total_used / mutator_count()) / ShenandoahHeapRegion::region_size_bytes()); + if (_partitions.count(ShenandoahFreeSetPartitionId::Mutator) > 0) { + frag_int = (100 * (total_used / _partitions.count(ShenandoahFreeSetPartitionId::Mutator)) + / ShenandoahHeapRegion::region_size_bytes()); } else { frag_int = 0; } ls.print(SIZE_FORMAT "%% internal; ", frag_int); + ls.print("Used: " SIZE_FORMAT "%s, Mutator Free: " SIZE_FORMAT, + byte_size_in_proper_unit(total_used), proper_unit_for_byte_size(total_used), + _partitions.count(ShenandoahFreeSetPartitionId::Mutator)); } { size_t max = 0; size_t total_free = 0; + size_t total_used = 0; - for (size_t idx = _collector_leftmost; idx <= _collector_rightmost; idx++) { - if (is_collector_free(idx)) { + for (idx_t idx = _partitions.leftmost(ShenandoahFreeSetPartitionId::Collector); + idx <= _partitions.rightmost(ShenandoahFreeSetPartitionId::Collector); idx++) { + if (_partitions.in_free_set(ShenandoahFreeSetPartitionId::Collector, idx)) { ShenandoahHeapRegion *r = _heap->get_region(idx); size_t free = alloc_capacity(r); max = MAX2(max, free); total_free += free; + total_used += r->used(); } } - - ls.print_cr("Reserve: " SIZE_FORMAT "%s, Max: " SIZE_FORMAT "%s", - byte_size_in_proper_unit(total_free), proper_unit_for_byte_size(total_free), - byte_size_in_proper_unit(max), proper_unit_for_byte_size(max)); + ls.print(" Collector Reserve: " SIZE_FORMAT "%s, Max: " SIZE_FORMAT "%s; Used: " SIZE_FORMAT "%s", + byte_size_in_proper_unit(total_free), proper_unit_for_byte_size(total_free), + byte_size_in_proper_unit(max), proper_unit_for_byte_size(max), + byte_size_in_proper_unit(total_used), proper_unit_for_byte_size(total_used)); } } } HeapWord* ShenandoahFreeSet::allocate(ShenandoahAllocRequest& req, bool& in_new_region) { shenandoah_assert_heaplocked(); - assert_bounds(); - if (req.size() > ShenandoahHeapRegion::humongous_threshold_words()) { switch (req.type()) { case ShenandoahAllocRequest::_alloc_shared: @@ -559,71 +1307,40 @@ HeapWord* ShenandoahFreeSet::allocate(ShenandoahAllocRequest& req, bool& in_new_ } } -size_t ShenandoahFreeSet::unsafe_peek_free() const { - // Deliberately not locked, this method is unsafe when free set is modified. - - for (size_t index = _mutator_leftmost; index <= _mutator_rightmost; index++) { - if (index < _max && is_mutator_free(index)) { - ShenandoahHeapRegion* r = _heap->get_region(index); - if (r->free() >= MinTLABSize) { - return r->free(); - } - } - } - - // It appears that no regions left - return 0; -} - void ShenandoahFreeSet::print_on(outputStream* out) const { - out->print_cr("Mutator Free Set: " SIZE_FORMAT "", mutator_count()); - for (size_t index = _mutator_leftmost; index <= _mutator_rightmost; index++) { - if (is_mutator_free(index)) { - _heap->get_region(index)->print_on(out); - } + out->print_cr("Mutator Free Set: " SIZE_FORMAT "", _partitions.count(ShenandoahFreeSetPartitionId::Mutator)); + idx_t rightmost = _partitions.rightmost(ShenandoahFreeSetPartitionId::Mutator); + for (idx_t index = _partitions.leftmost(ShenandoahFreeSetPartitionId::Mutator); index <= rightmost; ) { + assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, index), + "Boundaries or find_first_set_bit failed: " SSIZE_FORMAT, index); + _heap->get_region(index)->print_on(out); + index = _partitions.find_index_of_next_available_region(ShenandoahFreeSetPartitionId::Mutator, index + 1); } - out->print_cr("Collector Free Set: " SIZE_FORMAT "", collector_count()); - for (size_t index = _collector_leftmost; index <= _collector_rightmost; index++) { - if (is_collector_free(index)) { - _heap->get_region(index)->print_on(out); - } + out->print_cr("Collector Free Set: " SIZE_FORMAT "", _partitions.count(ShenandoahFreeSetPartitionId::Collector)); + rightmost = _partitions.rightmost(ShenandoahFreeSetPartitionId::Collector); + for (idx_t index = _partitions.leftmost(ShenandoahFreeSetPartitionId::Collector); index <= rightmost; ) { + assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::Collector, index), + "Boundaries or find_first_set_bit failed: " SSIZE_FORMAT, index); + _heap->get_region(index)->print_on(out); + index = _partitions.find_index_of_next_available_region(ShenandoahFreeSetPartitionId::Collector, index + 1); } } -/* - * Internal fragmentation metric: describes how fragmented the heap regions are. - * - * It is derived as: - * - * sum(used[i]^2, i=0..k) - * IF = 1 - ------------------------------ - * C * sum(used[i], i=0..k) - * - * ...where k is the number of regions in computation, C is the region capacity, and - * used[i] is the used space in the region. - * - * The non-linearity causes IF to be lower for the cases where the same total heap - * used is densely packed. For example: - * a) Heap is completely full => IF = 0 - * b) Heap is half full, first 50% regions are completely full => IF = 0 - * c) Heap is half full, each region is 50% full => IF = 1/2 - * d) Heap is quarter full, first 50% regions are completely full => IF = 0 - * e) Heap is quarter full, each region is 25% full => IF = 3/4 - * f) Heap has one small object per each region => IF =~ 1 - */ double ShenandoahFreeSet::internal_fragmentation() { double squared = 0; double linear = 0; int count = 0; - for (size_t index = _mutator_leftmost; index <= _mutator_rightmost; index++) { - if (is_mutator_free(index)) { - ShenandoahHeapRegion* r = _heap->get_region(index); - size_t used = r->used(); - squared += used * used; - linear += used; - count++; - } + idx_t rightmost = _partitions.rightmost(ShenandoahFreeSetPartitionId::Mutator); + for (idx_t index = _partitions.leftmost(ShenandoahFreeSetPartitionId::Mutator); index <= rightmost; ) { + assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, index), + "Boundaries or find_first_set_bit failed: " SSIZE_FORMAT, index); + ShenandoahHeapRegion* r = _heap->get_region(index); + size_t used = r->used(); + squared += used * used; + linear += used; + count++; + index = _partitions.find_index_of_next_available_region(ShenandoahFreeSetPartitionId::Mutator, index + 1); } if (count > 0) { @@ -634,43 +1351,31 @@ double ShenandoahFreeSet::internal_fragmentation() { } } -/* - * External fragmentation metric: describes how fragmented the heap is. - * - * It is derived as: - * - * EF = 1 - largest_contiguous_free / total_free - * - * For example: - * a) Heap is completely empty => EF = 0 - * b) Heap is completely full => EF = 0 - * c) Heap is first-half full => EF = 1/2 - * d) Heap is half full, full and empty regions interleave => EF =~ 1 - */ double ShenandoahFreeSet::external_fragmentation() { - size_t last_idx = 0; + idx_t last_idx = 0; size_t max_contig = 0; size_t empty_contig = 0; size_t free = 0; - for (size_t index = _mutator_leftmost; index <= _mutator_rightmost; index++) { - if (is_mutator_free(index)) { - ShenandoahHeapRegion* r = _heap->get_region(index); - if (r->is_empty()) { - free += ShenandoahHeapRegion::region_size_bytes(); - if (last_idx + 1 == index) { - empty_contig++; - } else { - empty_contig = 1; - } + idx_t rightmost = _partitions.rightmost(ShenandoahFreeSetPartitionId::Mutator); + for (idx_t index = _partitions.leftmost(ShenandoahFreeSetPartitionId::Mutator); index <= rightmost; ) { + assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::Mutator, index), + "Boundaries or find_first_set_bit failed: " SSIZE_FORMAT, index); + ShenandoahHeapRegion* r = _heap->get_region(index); + if (r->is_empty()) { + free += ShenandoahHeapRegion::region_size_bytes(); + if (last_idx + 1 == index) { + empty_contig++; } else { - empty_contig = 0; + empty_contig = 1; } - - max_contig = MAX2(max_contig, empty_contig); - last_idx = index; + } else { + empty_contig = 0; } + max_contig = MAX2(max_contig, empty_contig); + last_idx = index; + index = _partitions.find_index_of_next_available_region(ShenandoahFreeSetPartitionId::Mutator, index + 1); } if (free > 0) { @@ -680,30 +1385,3 @@ double ShenandoahFreeSet::external_fragmentation() { } } -#ifdef ASSERT -void ShenandoahFreeSet::assert_bounds() const { - // Performance invariants. Failing these would not break the free set, but performance - // would suffer. - assert (_mutator_leftmost <= _max, "leftmost in bounds: " SIZE_FORMAT " < " SIZE_FORMAT, _mutator_leftmost, _max); - assert (_mutator_rightmost < _max, "rightmost in bounds: " SIZE_FORMAT " < " SIZE_FORMAT, _mutator_rightmost, _max); - - assert (_mutator_leftmost == _max || is_mutator_free(_mutator_leftmost), "leftmost region should be free: " SIZE_FORMAT, _mutator_leftmost); - assert (_mutator_rightmost == 0 || is_mutator_free(_mutator_rightmost), "rightmost region should be free: " SIZE_FORMAT, _mutator_rightmost); - - size_t beg_off = _mutator_free_bitmap.find_first_set_bit(0); - size_t end_off = _mutator_free_bitmap.find_first_set_bit(_mutator_rightmost + 1); - assert (beg_off >= _mutator_leftmost, "free regions before the leftmost: " SIZE_FORMAT ", bound " SIZE_FORMAT, beg_off, _mutator_leftmost); - assert (end_off == _max, "free regions past the rightmost: " SIZE_FORMAT ", bound " SIZE_FORMAT, end_off, _mutator_rightmost); - - assert (_collector_leftmost <= _max, "leftmost in bounds: " SIZE_FORMAT " < " SIZE_FORMAT, _collector_leftmost, _max); - assert (_collector_rightmost < _max, "rightmost in bounds: " SIZE_FORMAT " < " SIZE_FORMAT, _collector_rightmost, _max); - - assert (_collector_leftmost == _max || is_collector_free(_collector_leftmost), "leftmost region should be free: " SIZE_FORMAT, _collector_leftmost); - assert (_collector_rightmost == 0 || is_collector_free(_collector_rightmost), "rightmost region should be free: " SIZE_FORMAT, _collector_rightmost); - - beg_off = _collector_free_bitmap.find_first_set_bit(0); - end_off = _collector_free_bitmap.find_first_set_bit(_collector_rightmost + 1); - assert (beg_off >= _collector_leftmost, "free regions before the leftmost: " SIZE_FORMAT ", bound " SIZE_FORMAT, beg_off, _collector_leftmost); - assert (end_off == _max, "free regions past the rightmost: " SIZE_FORMAT ", bound " SIZE_FORMAT, end_off, _collector_rightmost); -} -#endif diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index 634adfb63e0..e2852e5548c 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2016, 2019, Red Hat, Inc. All rights reserved. + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,70 +28,366 @@ #include "gc/shenandoah/shenandoahHeapRegionSet.hpp" #include "gc/shenandoah/shenandoahHeap.hpp" +#include "gc/shenandoah/shenandoahSimpleBitMap.hpp" + +// Each ShenandoahHeapRegion is associated with a ShenandoahFreeSetPartitionId. +enum class ShenandoahFreeSetPartitionId : uint8_t { + Mutator, // Region is in the Mutator free set: available memory is available to mutators. + Collector, // Region is in the Collector free set: available memory is reserved for evacuations. + NotFree // Region is in no free set: it has no available memory +}; + +// We do not maintain counts, capacity, or used for regions that are not free. Informally, if a region is NotFree, it is +// in no partition. NumPartitions represents the size of an array that may be indexed by Mutator or Collector. +#define NumPartitions (ShenandoahFreeSetPartitionId::NotFree) +#define IntNumPartitions int(ShenandoahFreeSetPartitionId::NotFree) +#define UIntNumPartitions uint(ShenandoahFreeSetPartitionId::NotFree) + +// ShenandoahRegionPartitions provides an abstraction to help organize the implementation of ShenandoahFreeSet. This +// class implements partitioning of regions into distinct sets. Each ShenandoahHeapRegion is either in the Mutator free set, +// the Collector free set, or in neither free set (NotFree). When we speak of a "free partition", we mean partitions that +// for which the ShenandoahFreeSetPartitionId is not equal to NotFree. +class ShenandoahRegionPartitions { + +private: + const ssize_t _max; // The maximum number of heap regions + const size_t _region_size_bytes; + const ShenandoahFreeSet* _free_set; + // For each partition, we maintain a bitmap of which regions are affiliated with his partition. + ShenandoahSimpleBitMap _membership[UIntNumPartitions]; + + // For each partition, we track an interval outside of which a region affiliated with that partition is guaranteed + // not to be found. This makes searches for free space more efficient. For each partition p, _leftmosts[p] + // represents its least index, and its _rightmosts[p] its greatest index. Empty intervals are indicated by the + // canonical [_max, -1]. + ssize_t _leftmosts[UIntNumPartitions]; + ssize_t _rightmosts[UIntNumPartitions]; + + // Allocation for humongous objects needs to find regions that are entirely empty. For each partion p, _leftmosts_empty[p] + // represents the first region belonging to this partition that is completely empty and _rightmosts_empty[p] represents the + // last region that is completely empty. If there is no completely empty region in this partition, this is represented + // by the canonical [_max, -1]. + ssize_t _leftmosts_empty[UIntNumPartitions]; + ssize_t _rightmosts_empty[UIntNumPartitions]; + + // For each partition p, _capacity[p] represents the total amount of memory within the partition at the time + // of the most recent rebuild, _used[p] represents the total amount of memory that has been allocated within this + // partition (either already allocated as of the rebuild, or allocated since the rebuild). _capacity[p] and _used[p] + // are denoted in bytes. Note that some regions that had been assigned to a particular partition at rebuild time + // may have been retired following the rebuild. The tallies for these regions are still reflected in _capacity[p] + // and _used[p], even though the region may have been removed from the free set. + size_t _capacity[UIntNumPartitions]; + size_t _used[UIntNumPartitions]; + size_t _region_counts[UIntNumPartitions]; + + // Shrink the intervals associated with partition when region idx is removed from this free set + inline void shrink_interval_if_boundary_modified(ShenandoahFreeSetPartitionId partition, ssize_t idx); + + // Shrink the intervals associated with partition when regions low_idx through high_idx inclusive are removed from this free set + inline void shrink_interval_if_range_modifies_either_boundary(ShenandoahFreeSetPartitionId partition, + ssize_t low_idx, ssize_t high_idx); + inline void expand_interval_if_boundary_modified(ShenandoahFreeSetPartitionId partition, ssize_t idx, size_t capacity); + +#ifndef PRODUCT + void dump_bitmap_row(ssize_t region_idx) const; + void dump_bitmap_range(ssize_t start_region_idx, ssize_t end_region_idx) const; + void dump_bitmap() const; +#endif +public: + ShenandoahRegionPartitions(size_t max_regions, ShenandoahFreeSet* free_set); + ~ShenandoahRegionPartitions() {} + + // Remove all regions from all partitions and reset all bounds + void make_all_regions_unavailable(); + + // Set the partition id for a particular region without adjusting interval bounds or usage/capacity tallies + inline void raw_assign_membership(size_t idx, ShenandoahFreeSetPartitionId p) { + _membership[int(p)].set_bit(idx); + } + + // Set the Mutator intervals, usage, and capacity according to arguments. Reset the Collector intervals, used, capacity + // to represent empty Collector free set. We use this at the end of rebuild_free_set() to avoid the overhead of making + // many redundant incremental adjustments to the mutator intervals as the free set is being rebuilt. + void establish_mutator_intervals(ssize_t mutator_leftmost, ssize_t mutator_rightmost, + ssize_t mutator_leftmost_empty, ssize_t mutator_rightmost_empty, + size_t mutator_region_count, size_t mutator_used); + + // Retire region idx from within partition, , leaving its capacity and used as part of the original free partition's totals. + // Requires that region idx is in in the Mutator or Collector partitions. Hereafter, identifies this region as NotFree. + // Any remnant of available memory at the time of retirement is added to the original partition's total of used bytes. + void retire_from_partition(ShenandoahFreeSetPartitionId p, ssize_t idx, size_t used_bytes); + + // Retire all regions between low_idx and high_idx inclusive from within partition. Requires that each region idx is + // in the same Mutator or Collector partition. Hereafter, identifies each region as NotFree. Assumes that each region + // is now considered fully used, since the region is presumably used to represent a humongous object. + void retire_range_from_partition(ShenandoahFreeSetPartitionId partition, ssize_t low_idx, ssize_t high_idx); + + // Place region idx into free set which_partition. Requires that idx is currently NotFree. + void make_free(ssize_t idx, ShenandoahFreeSetPartitionId which_partition, size_t region_capacity); + + // Place region idx into free partition new_partition, adjusting used and capacity totals for the original and new partition + // given that available bytes can still be allocated within this region. Requires that idx is currently not NotFree. + void move_from_partition_to_partition(ssize_t idx, ShenandoahFreeSetPartitionId orig_partition, + ShenandoahFreeSetPartitionId new_partition, size_t available); + + const char* partition_membership_name(ssize_t idx) const; + + // Return the index of the next available region >= start_index, or maximum_regions if not found. + inline ssize_t find_index_of_next_available_region(ShenandoahFreeSetPartitionId which_partition, ssize_t start_index) const; + + // Return the index of the previous available region <= last_index, or -1 if not found. + inline ssize_t find_index_of_previous_available_region(ShenandoahFreeSetPartitionId which_partition, ssize_t last_index) const; + + // Return the index of the next available cluster of cluster_size regions >= start_index, or maximum_regions if not found. + inline ssize_t find_index_of_next_available_cluster_of_regions(ShenandoahFreeSetPartitionId which_partition, + ssize_t start_index, size_t cluster_size) const; + + // Return the index of the previous available cluster of cluster_size regions <= last_index, or -1 if not found. + inline ssize_t find_index_of_previous_available_cluster_of_regions(ShenandoahFreeSetPartitionId which_partition, + ssize_t last_index, size_t cluster_size) const; + + inline bool in_free_set(ShenandoahFreeSetPartitionId which_partition, ssize_t idx) const { + return _membership[int(which_partition)].is_set(idx); + } + + // Returns the ShenandoahFreeSetPartitionId affiliation of region idx, NotFree if this region is not currently in any partition. + // This does not enforce that free_set membership implies allocation capacity. + inline ShenandoahFreeSetPartitionId membership(ssize_t idx) const; + +#ifdef ASSERT + // Returns true iff region idx's membership is which_partition. If which_partition represents a free set, asserts + // that the region has allocation capacity. + inline bool partition_id_matches(ssize_t idx, ShenandoahFreeSetPartitionId which_partition) const; +#endif + + inline size_t max_regions() const { return _max; } + + inline size_t region_size_bytes() const { return _region_size_bytes; }; + + // The following four methods return the left-most and right-most bounds on ranges of regions representing + // the requested set. The _empty variants represent bounds on the range that holds completely empty + // regions, which are required for humongous allocations and desired for "very large" allocations. + // if the requested which_partition is empty: + // leftmost() and leftmost_empty() return _max, rightmost() and rightmost_empty() return 0 + // otherwise, expect the following: + // 0 <= leftmost <= leftmost_empty <= rightmost_empty <= rightmost < _max + inline ssize_t leftmost(ShenandoahFreeSetPartitionId which_partition) const; + inline ssize_t rightmost(ShenandoahFreeSetPartitionId which_partition) const; + ssize_t leftmost_empty(ShenandoahFreeSetPartitionId which_partition); + ssize_t rightmost_empty(ShenandoahFreeSetPartitionId which_partition); + + inline bool is_empty(ShenandoahFreeSetPartitionId which_partition) const; + + inline void increase_used(ShenandoahFreeSetPartitionId which_partition, size_t bytes); + + inline size_t capacity_of(ShenandoahFreeSetPartitionId which_partition) const { + assert (which_partition < NumPartitions, "selected free set must be valid"); + return _capacity[int(which_partition)]; + } + + inline size_t used_by(ShenandoahFreeSetPartitionId which_partition) const { + assert (which_partition < NumPartitions, "selected free set must be valid"); + return _used[int(which_partition)]; + } + + inline size_t available_in(ShenandoahFreeSetPartitionId which_partition) const { + assert (which_partition < NumPartitions, "selected free set must be valid"); + return _capacity[int(which_partition)] - _used[int(which_partition)]; + } + + inline void set_capacity_of(ShenandoahFreeSetPartitionId which_partition, size_t value) { + assert (which_partition < NumPartitions, "selected free set must be valid"); + _capacity[int(which_partition)] = value; + } + + inline void set_used_by(ShenandoahFreeSetPartitionId which_partition, size_t value) { + assert (which_partition < NumPartitions, "selected free set must be valid"); + _used[int(which_partition)] = value; + } + + inline size_t count(ShenandoahFreeSetPartitionId which_partition) const { return _region_counts[int(which_partition)]; } + + // Assure leftmost, rightmost, leftmost_empty, and rightmost_empty bounds are valid for all free sets. + // Valid bounds honor all of the following (where max is the number of heap regions): + // if the set is empty, leftmost equals max and rightmost equals 0 + // Otherwise (the set is not empty): + // 0 <= leftmost < max and 0 <= rightmost < max + // the region at leftmost is in the set + // the region at rightmost is in the set + // rightmost >= leftmost + // for every idx that is in the set { + // idx >= leftmost && + // idx <= rightmost + // } + // if the set has no empty regions, leftmost_empty equals max and rightmost_empty equals 0 + // Otherwise (the region has empty regions): + // 0 <= leftmost_empty < max and 0 <= rightmost_empty < max + // rightmost_empty >= leftmost_empty + // for every idx that is in the set and is empty { + // idx >= leftmost && + // idx <= rightmost + // } + void assert_bounds() NOT_DEBUG_RETURN; +}; + +// Publicly, ShenandoahFreeSet represents memory that is available to mutator threads. The public capacity(), used(), +// and available() methods represent this public notion of memory that is under control of the mutator. Separately, +// ShenandoahFreeSet also represents memory available to garbage collection activities for compaction purposes. +// +// The Shenandoah garbage collector evacuates live objects out of specific regions that are identified as members of the +// collection set (cset). +// +// The ShenandoahFreeSet endeavors to congregrate survivor objects (objects that have been evacuated at least once) at the +// high end of memory. New mutator allocations are taken from the low end of memory. Within the mutator's range of regions, +// humongous allocations are taken from the lowest addresses, and LAB (local allocation buffers) and regular shared allocations +// are taken from the higher address of the mutator's range of regions. This approach allows longer lasting survivor regions +// to congregate at the top of the heap and longer lasting humongous regions to congregate at the bottom of the heap, with +// short-lived frequently evacuated regions occupying the middle of the heap. +// +// Mutator and garbage collection activities tend to scramble the content of regions. Twice, during each GC pass, we rebuild +// the free set in an effort to restore the efficient segregation of Collector and Mutator regions: +// +// 1. At the start of evacuation, we know exactly how much memory is going to be evacuated, and this guides our +// sizing of the Collector free set. +// +// 2. At the end of GC, we have reclaimed all of the memory that was spanned by the cset. We rebuild here to make +// sure there is enough memory reserved at the high end of memory to hold the objects that might need to be evacuated +// during the next GC pass. class ShenandoahFreeSet : public CHeapObj { private: ShenandoahHeap* const _heap; - CHeapBitMap _mutator_free_bitmap; - CHeapBitMap _collector_free_bitmap; - size_t _max; + ShenandoahRegionPartitions _partitions; - // Left-most and right-most region indexes. There are no free regions outside - // of [left-most; right-most] index intervals - size_t _mutator_leftmost, _mutator_rightmost; - size_t _collector_leftmost, _collector_rightmost; + // Mutator allocations are biased from left-to-right or from right-to-left based on which end of mutator range + // is most likely to hold partially used regions. In general, we want to finish consuming partially used + // regions and retire them in order to reduce the regions that must be searched for each allocation request. + bool _right_to_left_bias; - size_t _capacity; - size_t _used; + // We re-evaluate the left-to-right allocation bias whenever _alloc_bias_weight is less than zero. Each time + // we allocate an object, we decrement the count of this value. Each time we re-evaluate whether to allocate + // from right-to-left or left-to-right, we reset the value of this counter to _InitialAllocBiasWeight. + ssize_t _alloc_bias_weight; - void assert_bounds() const NOT_DEBUG_RETURN; - - bool is_mutator_free(size_t idx) const; - bool is_collector_free(size_t idx) const; + const ssize_t _InitialAllocBiasWeight = 256; HeapWord* try_allocate_in(ShenandoahHeapRegion* region, ShenandoahAllocRequest& req, bool& in_new_region); + + // While holding the heap lock, allocate memory for a single object or LAB which is to be entirely contained + // within a single HeapRegion as characterized by req. + // + // Precondition: req.size() <= ShenandoahHeapRegion::humongous_threshold_words(). HeapWord* allocate_single(ShenandoahAllocRequest& req, bool& in_new_region); + + // While holding the heap lock, allocate memory for a humongous object which spans one or more regions that + // were previously empty. Regions that represent humongous objects are entirely dedicated to the humongous + // object. No other objects are packed into these regions. + // + // Precondition: req.size() > ShenandoahHeapRegion::humongous_threshold_words(). HeapWord* allocate_contiguous(ShenandoahAllocRequest& req); + // Change region r from the Mutator partition to the GC's Collector partition. This requires that the region is entirely empty. + // Typical usage: During evacuation, the GC may find it needs more memory than had been reserved at the start of evacuation to + // hold evacuated objects. If this occurs and memory is still available in the Mutator's free set, we will flip a region from + // the Mutator free set into the Collector free set. void flip_to_gc(ShenandoahHeapRegion* r); - - void recompute_bounds(); - void adjust_bounds(); - bool touches_bounds(size_t num) const; - - void increase_used(size_t amount); void clear_internal(); - - size_t collector_count() const { return _collector_free_bitmap.count_one_bits(); } - size_t mutator_count() const { return _mutator_free_bitmap.count_one_bits(); } - void try_recycle_trashed(ShenandoahHeapRegion *r); - bool can_allocate_from(ShenandoahHeapRegion *r); - size_t alloc_capacity(ShenandoahHeapRegion *r); - bool has_no_alloc_capacity(ShenandoahHeapRegion *r); + // Returns true iff this region is entirely available, either because it is empty() or because it has been found to represent + // immediate trash and we'll be able to immediately recycle it. Note that we cannot recycle immediate trash if + // concurrent weak root processing is in progress. + inline bool can_allocate_from(ShenandoahHeapRegion *r) const; + inline bool can_allocate_from(size_t idx) const; + + inline bool has_alloc_capacity(ShenandoahHeapRegion *r) const; + + // This function places all regions that have allocation capacity into the mutator_partition, identifying regions + // that have no allocation capacity as NotFree. Subsequently, we will move some of the mutator regions into the + // collector partition with the intent of packing collector memory into the highest (rightmost) addresses of the + // heap, with mutator memory consuming the lowest addresses of the heap. + void find_regions_with_alloc_capacity(size_t &cset_regions); + + // Having placed all regions that have allocation capacity into the mutator partition, move some of these regions from + // the mutator partition into the collector partition in order to assure that the memory available for allocations within + // the collector partition is at least to_reserve. + void reserve_regions(size_t to_reserve); + + // Overwrite arguments to represent the number of regions to be reclaimed from the cset + void prepare_to_rebuild(size_t &cset_regions); + + void finish_rebuild(size_t cset_regions); public: ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions); + // Public because ShenandoahRegionPartitions assertions require access. + inline size_t alloc_capacity(ShenandoahHeapRegion *r) const; + inline size_t alloc_capacity(size_t idx) const; + void clear(); void rebuild(); - void recycle_trash(); + // Move up to cset_regions number of regions from being available to the collector to being available to the mutator. + // + // Typical usage: At the end of evacuation, when the collector no longer needs the regions that had been reserved + // for evacuation, invoke this to make regions available for mutator allocations. + // + // Note that we plan to replenish the Collector reserve at the end of update refs, at which time all + // of the regions recycled from the collection set will be available. If the very unlikely event that there + // are fewer regions in the collection set than remain in the collector set, we limit the transfer in order + // to assure that the replenished Collector reserve can be sufficiently large. + void move_regions_from_collector_to_mutator(size_t cset_regions); + void recycle_trash(); void log_status(); - size_t capacity() const { return _capacity; } - size_t used() const { return _used; } - size_t available() const { - assert(_used <= _capacity, "must use less than capacity"); - return _capacity - _used; + inline size_t capacity() const { return _partitions.capacity_of(ShenandoahFreeSetPartitionId::Mutator); } + inline size_t used() const { return _partitions.used_by(ShenandoahFreeSetPartitionId::Mutator); } + inline size_t available() const { + assert(used() <= capacity(), "must use less than capacity"); + return capacity() - used(); } HeapWord* allocate(ShenandoahAllocRequest& req, bool& in_new_region); size_t unsafe_peek_free() const; + /* + * Internal fragmentation metric: describes how fragmented the heap regions are. + * + * It is derived as: + * + * sum(used[i]^2, i=0..k) + * IF = 1 - ------------------------------ + * C * sum(used[i], i=0..k) + * + * ...where k is the number of regions in computation, C is the region capacity, and + * used[i] is the used space in the region. + * + * The non-linearity causes IF to be lower for the cases where the same total heap + * used is densely packed. For example: + * a) Heap is completely full => IF = 0 + * b) Heap is half full, first 50% regions are completely full => IF = 0 + * c) Heap is half full, each region is 50% full => IF = 1/2 + * d) Heap is quarter full, first 50% regions are completely full => IF = 0 + * e) Heap is quarter full, each region is 25% full => IF = 3/4 + * f) Heap has one small object per each region => IF =~ 1 + */ double internal_fragmentation(); + + /* + * External fragmentation metric: describes how fragmented the heap is. + * + * It is derived as: + * + * EF = 1 - largest_contiguous_free / total_free + * + * For example: + * a) Heap is completely empty => EF = 0 + * b) Heap is completely full => EF = 0 + * c) Heap is first-half full => EF = 1/2 + * d) Heap is half full, full and empty regions interleave => EF =~ 1 + */ double external_fragmentation(); void print_on(outputStream* out) const; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp index 2aeec0911d0..de7d81d0f43 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp @@ -912,7 +912,6 @@ private: public: ShenandoahPostCompactClosure() : _heap(ShenandoahHeap::heap()), _live(0) { - _heap->free_set()->clear(); } void heap_region_do(ShenandoahHeapRegion* r) { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp index 53d4911797d..8150df10914 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp @@ -2103,17 +2103,27 @@ public: if (CONCURRENT) { ShenandoahConcurrentWorkerSession worker_session(worker_id); ShenandoahSuspendibleThreadSetJoiner stsj; - do_work(); + do_work(worker_id); } else { ShenandoahParallelWorkerSession worker_session(worker_id); - do_work(); + do_work(worker_id); } } private: template - void do_work() { + void do_work(uint worker_id) { T cl; + if (CONCURRENT && (worker_id == 0)) { + // We ask the first worker to replenish the Mutator free set by moving regions previously reserved to hold the + // results of evacuation. These reserves are no longer necessary because evacuation has completed. + size_t cset_regions = _heap->collection_set()->count(); + // We cannot transfer any more regions than will be reclaimed when the existing collection set is recycled because + // we need the reclaimed collection set regions to replenish the collector reserves + _heap->free_set()->move_regions_from_collector_to_mutator(cset_regions); + } + // If !CONCURRENT, there's no value in expanding Mutator free set + ShenandoahHeapRegion* r = _regions->next(); ShenandoahMarkingContext* const ctx = _heap->complete_marking_context(); while (r != nullptr) { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.cpp new file mode 100644 index 00000000000..c3e8108752f --- /dev/null +++ b/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.cpp @@ -0,0 +1,291 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shenandoah/shenandoahSimpleBitMap.hpp" + +ShenandoahSimpleBitMap::ShenandoahSimpleBitMap(size_t num_bits) : + _num_bits(num_bits), + _num_words(align_up(num_bits, BitsPerWord) / BitsPerWord), + _bitmap(NEW_C_HEAP_ARRAY(uintx, _num_words, mtGC)) +{ + clear_all(); +} + +ShenandoahSimpleBitMap::~ShenandoahSimpleBitMap() { + if (_bitmap != nullptr) { + FREE_C_HEAP_ARRAY(uintx, _bitmap); + } +} + +size_t ShenandoahSimpleBitMap::count_leading_ones(idx_t start_idx) const { + assert((start_idx >= 0) && (start_idx < _num_bits), "precondition"); + size_t array_idx = start_idx >> LogBitsPerWord; + uintx element_bits = _bitmap[array_idx]; + uintx bit_number = start_idx & right_n_bits(LogBitsPerWord); + uintx mask = ~right_n_bits(bit_number); + size_t counted_ones = 0; + while ((element_bits & mask) == mask) { + // All bits numbered >= bit_number are set + size_t found_ones = BitsPerWord - bit_number; + counted_ones += found_ones; + // Dead code: do not need to compute: start_idx += found_ones; + // Strength reduction: array_idx = (start_idx >> LogBitsPerWord) + array_idx++; + element_bits = _bitmap[array_idx]; + // Constant folding: bit_number = start_idx & right_n_bits(LogBitsPerWord); + bit_number = 0; + // Constant folding: mask = ~right_n_bits(bit_number); + mask = ~0; + } + + // Add in number of consecutive ones starting with the_bit and including more significant bits and return result + uintx aligned = element_bits >> bit_number; + uintx complement = ~aligned; + return counted_ones + count_trailing_zeros(complement); +} + +size_t ShenandoahSimpleBitMap::count_trailing_ones(idx_t last_idx) const { + assert((last_idx >= 0) && (last_idx < _num_bits), "precondition"); + size_t array_idx = last_idx >> LogBitsPerWord; + uintx element_bits = _bitmap[array_idx]; + uintx bit_number = last_idx & right_n_bits(LogBitsPerWord); + // All ones from bit 0 to the_bit + uintx mask = right_n_bits(bit_number + 1); + size_t counted_ones = 0; + while ((element_bits & mask) == mask) { + // All bits numbered <= bit_number are set + size_t found_ones = bit_number + 1; + counted_ones += found_ones; + // Dead code: do not need to compute: last_idx -= found_ones; + array_idx--; + element_bits = _bitmap[array_idx]; + // Constant folding: bit_number = last_idx & right_n_bits(LogBitsPerWord); + bit_number = BitsPerWord - 1; + // Constant folding: mask = right_n_bits(bit_number + 1); + mask = ~0; + } + + // Add in number of consecutive ones starting with the_bit and including less significant bits and return result + uintx aligned = element_bits << (BitsPerWord - (bit_number + 1)); + uintx complement = ~aligned; + return counted_ones + count_leading_zeros(complement); +} + +bool ShenandoahSimpleBitMap::is_forward_consecutive_ones(idx_t start_idx, idx_t count) const { + while (count > 0) { + assert((start_idx >= 0) && (start_idx < _num_bits), "precondition: start_idx: " SSIZE_FORMAT ", count: " SSIZE_FORMAT, + start_idx, count); + assert(start_idx + count <= (idx_t) _num_bits, "precondition"); + size_t array_idx = start_idx >> LogBitsPerWord; + uintx bit_number = start_idx & right_n_bits(LogBitsPerWord); + uintx element_bits = _bitmap[array_idx]; + uintx bits_to_examine = BitsPerWord - bit_number; + element_bits >>= bit_number; + uintx complement = ~element_bits; + uintx trailing_ones; + if (complement != 0) { + trailing_ones = count_trailing_zeros(complement); + } else { + trailing_ones = bits_to_examine; + } + if (trailing_ones >= (uintx) count) { + return true; + } else if (trailing_ones == bits_to_examine) { + start_idx += bits_to_examine; + count -= bits_to_examine; + // Repeat search with smaller goal + } else { + return false; + } + } + return true; +} + +bool ShenandoahSimpleBitMap::is_backward_consecutive_ones(idx_t last_idx, idx_t count) const { + while (count > 0) { + assert((last_idx >= 0) && (last_idx < _num_bits), "precondition"); + assert(last_idx - count >= -1, "precondition"); + size_t array_idx = last_idx >> LogBitsPerWord; + uintx bit_number = last_idx & right_n_bits(LogBitsPerWord); + uintx element_bits = _bitmap[array_idx]; + uintx bits_to_examine = bit_number + 1; + element_bits <<= (BitsPerWord - bits_to_examine); + uintx complement = ~element_bits; + uintx leading_ones; + if (complement != 0) { + leading_ones = count_leading_zeros(complement); + } else { + leading_ones = bits_to_examine; + } + if (leading_ones >= (uintx) count) { + return true; + } else if (leading_ones == bits_to_examine) { + last_idx -= leading_ones; + count -= leading_ones; + // Repeat search with smaller goal + } else { + return false; + } + } + return true; +} + +idx_t ShenandoahSimpleBitMap::find_first_consecutive_set_bits(idx_t beg, idx_t end, size_t num_bits) const { + assert((beg >= 0) && (beg < _num_bits), "precondition"); + + // Stop looking if there are not num_bits remaining in probe space. + idx_t start_boundary = end - num_bits; + if (beg > start_boundary) { + return end; + } + uintx array_idx = beg >> LogBitsPerWord; + uintx bit_number = beg & right_n_bits(LogBitsPerWord); + uintx element_bits = _bitmap[array_idx]; + if (bit_number > 0) { + uintx mask_out = right_n_bits(bit_number); + element_bits &= ~mask_out; + } + + // The following loop minimizes the number of spans probed in order to find num_bits consecutive bits. + // For example, if bit_number = beg = 0, num_bits = 8, and element bits equals 00111111_11000000_00000000_10011000B, + // we need only 3 probes to find the match at bit offset 22. + // + // Let beg = 0 + // element_bits = 00111111_11000000_00000000_10011000B; + // ________ (the searched span) + // ^ ^ ^- bit_number = beg = 0 + // | +-- next_start_candidate_1 (where next 1 is found) + // +------ next_start_candidate_2 (start of the trailing 1s within span) + // Let beg = 7 + // element_bits = 00111111_11000000_00000000_10011000B; + // ^ ^_________ (the searched span) + // | | ^- bit_number = beg = 7 + // | +---------- next_start_candidate_2 (there are no trailing 1s within span) + // +------------------ next_start_candidate_1 (where next 1 is found) + // Let beg = 22 + // Let beg = 22 + // element_bits = 00111111_11000001_11111100_10011000B; + // _________ (the searched span) + // ^- bit_number = beg = 18 + // Here, is_forward_consecutive_ones(22, 8) succeeds and we report the match + + while (true) { + if (element_bits == 0) { + // move to the next element + beg += BitsPerWord - bit_number; + if (beg > start_boundary) { + // No match found. + return end; + } + array_idx++; + bit_number = 0; + element_bits = _bitmap[array_idx]; + } else if (is_forward_consecutive_ones(beg, num_bits)) { + return beg; + } else { + // There is at least one non-zero bit within the masked element_bits. Arrange to skip over bits that + // cannot be part of a consecutive-ones match. + uintx next_set_bit = count_trailing_zeros(element_bits); + uintx next_start_candidate_1 = (array_idx << LogBitsPerWord) + next_set_bit; + + // There is at least one zero bit in this span. Align the next probe at the start of trailing ones for probed span, + // or align at end of span if this span has no trailing ones. + size_t trailing_ones = count_trailing_ones(beg + num_bits - 1); + uintx next_start_candidate_2 = beg + num_bits - trailing_ones; + + beg = MAX2(next_start_candidate_1, next_start_candidate_2); + if (beg > start_boundary) { + // No match found. + return end; + } + array_idx = beg >> LogBitsPerWord; + element_bits = _bitmap[array_idx]; + bit_number = beg & right_n_bits(LogBitsPerWord); + if (bit_number > 0) { + size_t mask_out = right_n_bits(bit_number); + element_bits &= ~mask_out; + } + } + } +} + +idx_t ShenandoahSimpleBitMap::find_last_consecutive_set_bits(const idx_t beg, idx_t end, const size_t num_bits) const { + + assert((end >= 0) && (end < _num_bits), "precondition"); + + // Stop looking if there are not num_bits remaining in probe space. + idx_t last_boundary = beg + num_bits; + if (end < last_boundary) { + return beg; + } + + size_t array_idx = end >> LogBitsPerWord; + uintx bit_number = end & right_n_bits(LogBitsPerWord); + uintx element_bits = _bitmap[array_idx]; + if (bit_number < BitsPerWord - 1) { + uintx mask_in = right_n_bits(bit_number + 1); + element_bits &= mask_in; + } + + // See comment in find_first_consecutive_set_bits to understand how this loop works. + while (true) { + if (element_bits == 0) { + // move to the previous element + end -= bit_number + 1; + if (end < last_boundary) { + // No match found. + return beg; + } + array_idx--; + bit_number = BitsPerWord - 1; + element_bits = _bitmap[array_idx]; + } else if (is_backward_consecutive_ones(end, num_bits)) { + return end + 1 - num_bits; + } else { + // There is at least one non-zero bit within the masked element_bits. Arrange to skip over bits that + // cannot be part of a consecutive-ones match. + uintx next_set_bit = BitsPerWord - (1 + count_leading_zeros(element_bits)); + uintx next_last_candidate_1 = (array_idx << LogBitsPerWord) + next_set_bit; + + // There is at least one zero bit in this span. Align the next probe at the end of leading ones for probed span, + // or align before start of span if this span has no leading ones. + size_t leading_ones = count_leading_ones(end - (num_bits - 1)); + uintx next_last_candidate_2 = end - (num_bits - leading_ones); + + end = MIN2(next_last_candidate_1, next_last_candidate_2); + if (end < last_boundary) { + // No match found. + return beg; + } + array_idx = end >> LogBitsPerWord; + bit_number = end & right_n_bits(LogBitsPerWord); + element_bits = _bitmap[array_idx]; + if (bit_number < BitsPerWord - 1){ + size_t mask_in = right_n_bits(bit_number + 1); + element_bits &= mask_in; + } + } + } +} diff --git a/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.hpp new file mode 100644 index 00000000000..c22e9527002 --- /dev/null +++ b/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.hpp @@ -0,0 +1,170 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHSIMPLEBITMAP_HPP +#define SHARE_GC_SHENANDOAH_SHENANDOAHSIMPLEBITMAP_HPP + +#include + +#include "gc/shenandoah/shenandoahAsserts.hpp" + +// TODO: Merge the enhanced capabilities of ShenandoahSimpleBitMap into src/hotspot/share/utilities/bitMap.hpp +// and deprecate ShenandoahSimpleBitMap. The key enhanced capabilities to be integrated include: +// +// 1. Allow searches from high to low memory (when biasing allocations towards the top of the heap) +// 2. Allow searches for clusters of contiguous set bits (to expedite allocation for humongous objects) +// +// idx_t is defined here as ssize_t. In src/hotspot/share/utiliities/bitMap.hpp, idx is defined as size_t. +// This is a significant incompatibility. +// +// The API and internal implementation of ShenandoahSimpleBitMap and ShenandoahRegionPartitions use idx_t to +// represent index, even though index is "inherently" unsigned. There are two reasons for this choice: +// 1. We use -1 as a sentinel value to represent empty partitions. This same value may be used to represent +// failure to find a previous set bit or previous range of set bits. +// 2. Certain loops are written most naturally if the iterator, which may hold the sentinel -1 value, can be +// declared as signed and the terminating condition can be < 0. + +typedef ssize_t idx_t; + +// ShenandoahSimpleBitMap resembles CHeapBitMap but adds missing support for find_first_consecutive_set_bits() and +// find_last_consecutive_set_bits. An alternative refactoring of code would subclass CHeapBitMap, but this might +// break abstraction rules, because efficient implementation requires assumptions about superclass internals that +// might be violatee through future software maintenance. +class ShenandoahSimpleBitMap { + const idx_t _num_bits; + const size_t _num_words; + uintx* const _bitmap; + +public: + ShenandoahSimpleBitMap(size_t num_bits); + + ~ShenandoahSimpleBitMap(); + + void clear_all() { + for (size_t i = 0; i < _num_words; i++) { + _bitmap[i] = 0; + } + } + +private: + + // Count consecutive ones in forward order, starting from start_idx. Requires that there is at least one zero + // between start_idx and index value (_num_bits - 1), inclusive. + size_t count_leading_ones(idx_t start_idx) const; + + // Count consecutive ones in reverse order, starting from last_idx. Requires that there is at least one zero + // between last_idx and index value zero, inclusive. + size_t count_trailing_ones(idx_t last_idx) const; + + bool is_forward_consecutive_ones(idx_t start_idx, idx_t count) const; + bool is_backward_consecutive_ones(idx_t last_idx, idx_t count) const; + +public: + + inline idx_t aligned_index(idx_t idx) const { + assert((idx >= 0) && (idx < _num_bits), "precondition"); + idx_t array_idx = idx & ~right_n_bits(LogBitsPerWord); + return array_idx; + } + + inline constexpr idx_t alignment() const { + return BitsPerWord; + } + + // For testing + inline idx_t size() const { + return _num_bits; + } + + // Return the word that holds idx bit and its neighboring bits. + inline uintx bits_at(idx_t idx) const { + assert((idx >= 0) && (idx < _num_bits), "precondition"); + idx_t array_idx = idx >> LogBitsPerWord; + return _bitmap[array_idx]; + } + + inline void set_bit(idx_t idx) { + assert((idx >= 0) && (idx < _num_bits), "precondition"); + size_t array_idx = idx >> LogBitsPerWord; + uintx bit_number = idx & right_n_bits(LogBitsPerWord); + uintx the_bit = nth_bit(bit_number); + _bitmap[array_idx] |= the_bit; + } + + inline void clear_bit(idx_t idx) { + assert((idx >= 0) && (idx < _num_bits), "precondition"); + assert(idx >= 0, "precondition"); + size_t array_idx = idx >> LogBitsPerWord; + uintx bit_number = idx & right_n_bits(LogBitsPerWord); + uintx the_bit = nth_bit(bit_number); + _bitmap[array_idx] &= ~the_bit; + } + + inline bool is_set(idx_t idx) const { + assert((idx >= 0) && (idx < _num_bits), "precondition"); + assert(idx >= 0, "precondition"); + size_t array_idx = idx >> LogBitsPerWord; + uintx bit_number = idx & right_n_bits(LogBitsPerWord); + uintx the_bit = nth_bit(bit_number); + return (_bitmap[array_idx] & the_bit)? true: false; + } + + // Return the index of the first set bit in the range [beg, size()), or size() if none found. + // precondition: beg and end form a valid range for the bitmap. + inline idx_t find_first_set_bit(idx_t beg) const; + + // Return the index of the first set bit in the range [beg, end), or end if none found. + // precondition: beg and end form a valid range for the bitmap. + inline idx_t find_first_set_bit(idx_t beg, idx_t end) const; + + // Return the index of the last set bit in the range (-1, end], or -1 if none found. + // precondition: beg and end form a valid range for the bitmap. + inline idx_t find_last_set_bit(idx_t end) const; + + // Return the index of the last set bit in the range (beg, end], or beg if none found. + // precondition: beg and end form a valid range for the bitmap. + inline idx_t find_last_set_bit(idx_t beg, idx_t end) const; + + // Return the start index of the first run of consecutive set bits for which the first set bit is within + // the range [beg, size()), or size() if the run of is not found within this range. + // precondition: beg is within the valid range for the bitmap. + inline idx_t find_first_consecutive_set_bits(idx_t beg, size_t num_bits) const; + + // Return the start index of the first run of consecutive set bits for which the first set bit is within + // the range [beg, end), or end if the run of is not found within this range. + // precondition: beg and end form a valid range for the bitmap. + idx_t find_first_consecutive_set_bits(idx_t beg, idx_t end, size_t num_bits) const; + + // Return the start index of the last run of consecutive set bits for which the entire run of set bits is within + // the range (-1, end], or -1 if the run of is not found within this range. + // precondition: end is within the valid range for the bitmap. + inline idx_t find_last_consecutive_set_bits(idx_t end, size_t num_bits) const; + + // Return the start index of the first run of consecutive set bits for which the entire run of set bits is within + // the range (beg, end], or beg if the run of is not found within this range. + // precondition: beg and end form a valid range for the bitmap. + idx_t find_last_consecutive_set_bits(idx_t beg, idx_t end, size_t num_bits) const; +}; + +#endif // SHARE_GC_SHENANDOAH_SHENANDOAHSIMPLEBITMAP_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.inline.hpp new file mode 100644 index 00000000000..3e602ed11e0 --- /dev/null +++ b/src/hotspot/share/gc/shenandoah/shenandoahSimpleBitMap.inline.hpp @@ -0,0 +1,100 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHSIMPLEBITMAP_INLINE_HPP +#define SHARE_GC_SHENANDOAH_SHENANDOAHSIMPLEBITMAP_INLINE_HPP + +#include "gc/shenandoah/shenandoahSimpleBitMap.hpp" + +inline idx_t ShenandoahSimpleBitMap::find_first_set_bit(idx_t beg, idx_t end) const { + assert((beg >= 0) && (beg < _num_bits), "precondition"); + assert((end > beg) && (end <= _num_bits), "precondition"); + do { + size_t array_idx = beg >> LogBitsPerWord; + uintx bit_number = beg & right_n_bits(LogBitsPerWord); + uintx element_bits = _bitmap[array_idx]; + if (bit_number > 0) { + uintx mask_out = right_n_bits(bit_number); + element_bits &= ~mask_out; + } + if (element_bits) { + // The next set bit is here. Find first set bit >= bit_number; + uintx aligned = element_bits >> bit_number; + uintx first_set_bit = count_trailing_zeros(aligned); + idx_t candidate_result = (array_idx * BitsPerWord) + bit_number + first_set_bit; + return (candidate_result < end)? candidate_result: end; + } else { + // Next bit is not here. Try the next array element + beg += BitsPerWord - bit_number; + } + } while (beg < end); + return end; +} + +inline idx_t ShenandoahSimpleBitMap::find_first_set_bit(idx_t beg) const { + assert((beg >= 0) && (beg < size()), "precondition"); + return find_first_set_bit(beg, size()); +} + +inline idx_t ShenandoahSimpleBitMap::find_last_set_bit(idx_t beg, idx_t end) const { + assert((end >= 0) && (end < _num_bits), "precondition"); + assert((beg >= -1) && (beg < end), "precondition"); + do { + idx_t array_idx = end >> LogBitsPerWord; + uintx bit_number = end & right_n_bits(LogBitsPerWord); + uintx element_bits = _bitmap[array_idx]; + if (bit_number < BitsPerWord - 1){ + uintx mask_in = right_n_bits(bit_number + 1); + element_bits &= mask_in; + } + if (element_bits) { + // The prev set bit is here. Find the first set bit <= bit_number + uintx aligned = element_bits << (BitsPerWord - (bit_number + 1)); + uintx first_set_bit = count_leading_zeros(aligned); + idx_t candidate_result = array_idx * BitsPerWord + (bit_number - first_set_bit); + return (candidate_result > beg)? candidate_result: beg; + } else { + // Next bit is not here. Try the previous array element + end -= (bit_number + 1); + } + } while (end > beg); + return beg; +} + +inline idx_t ShenandoahSimpleBitMap::find_last_set_bit(idx_t end) const { + assert((end >= 0) && (end < _num_bits), "precondition"); + return find_last_set_bit(-1, end); +} + +inline idx_t ShenandoahSimpleBitMap::find_first_consecutive_set_bits(idx_t beg, size_t num_bits) const { + assert((beg >= 0) && (beg < _num_bits), "precondition"); + return find_first_consecutive_set_bits(beg, size(), num_bits); +} + +inline idx_t ShenandoahSimpleBitMap::find_last_consecutive_set_bits(idx_t end, size_t num_bits) const { + assert((end >= 0) && (end < _num_bits), "precondition"); + return find_last_consecutive_set_bits((idx_t) -1, end, num_bits); +} + +#endif // SHARE_GC_SHENANDOAH_SHENANDOAHSIMPLEBITMAP_INLINE_HPP diff --git a/test/hotspot/gtest/gc/shenandoah/test_shenandoahSimpleBitMap.cpp b/test/hotspot/gtest/gc/shenandoah/test_shenandoahSimpleBitMap.cpp new file mode 100644 index 00000000000..c9a6d638ba8 --- /dev/null +++ b/test/hotspot/gtest/gc/shenandoah/test_shenandoahSimpleBitMap.cpp @@ -0,0 +1,451 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "gc/shenandoah/shenandoahSimpleBitMap.hpp" +#include "gc/shenandoah/shenandoahSimpleBitMap.inline.hpp" + +#include +#include "utilities/ostream.hpp" +#include "utilities/vmassert_uninstall.hpp" +#include "utilities/vmassert_reinstall.hpp" +#include "unittest.hpp" + +static bool _success; +static size_t _assertion_failures; + +#define BitMapAssertEqual(a, b) ASSERT_EQ((a), (b)); if ((a) != (b)) { _assertion_failures++; } + +class ShenandoahSimpleBitMapTest: public ::testing::Test { +protected: + + static const ssize_t SMALL_BITMAP_SIZE = 512; + static const ssize_t LARGE_BITMAP_SIZE = 4096; + + // set_bits[] is an array of indexes holding bits that are supposed to be set, in increasing order. + static void verifyBitMapState(ShenandoahSimpleBitMap& bm, ssize_t size, ssize_t set_bits[], ssize_t num_set_bits) { + // Verify number of bits + BitMapAssertEqual(bm.size(), size); + + ssize_t set_bit_index = 0; + // Check that is_set(idx) for every possible idx + for (ssize_t i = 0; i < size; i++) { + bool is_set = bm.is_set(i); + bool intended_value = false;; + if (set_bit_index < num_set_bits) { + if (set_bits[set_bit_index] == i) { + intended_value = true; + set_bit_index++; + } + } else { + // If we've exhausted set_bits array, there should be no more set_bits + BitMapAssertEqual(is_set, false); + BitMapAssertEqual(set_bit_index, num_set_bits); + } + BitMapAssertEqual(is_set, intended_value); + } + BitMapAssertEqual(set_bit_index, num_set_bits); + + // Check that bits_at(array_idx) matches intended value for every valid array_idx value + set_bit_index = 0; + ssize_t alignment = bm.alignment(); + for (ssize_t i = 0; i < size; i += alignment) { + size_t bits = bm.bits_at(i); + for (ssize_t b = 0; b < alignment; b++) { + ssize_t bit_value = i + b; + bool intended_value = false;; + if (set_bit_index < num_set_bits) { + if (set_bits[set_bit_index] == bit_value) { + intended_value = true; + set_bit_index++; + } + } + size_t bit_mask = ((size_t) 0x01) << b; + bool is_set = (bits & bit_mask) != 0; + BitMapAssertEqual(is_set, intended_value); + } + } + + // Make sure find_first_set_bit() works correctly + ssize_t probe_point = 0; + for (ssize_t i = 0; i < num_set_bits; i++) { + ssize_t next_expected_bit = set_bits[i]; + probe_point = bm.find_first_set_bit(probe_point); + BitMapAssertEqual(probe_point, next_expected_bit); + probe_point++; // Prepare to look beyond the most recent bit. + } + if (probe_point < size) { + probe_point = bm.find_first_set_bit(probe_point); + BitMapAssertEqual(probe_point, size); // Verify that last failed search returns sentinel value: num bits in bit map + } + + // Confirm that find_first_set_bit() with a bounded search space works correctly + // Limit this search to the first 3/4 of the full bit map + ssize_t boundary_idx = 3 * size / 4; + probe_point = 0; + for (ssize_t i = 0; i < num_set_bits; i++) { + ssize_t next_expected_bit = set_bits[i]; + if (next_expected_bit >= boundary_idx) { + break; + } else { + probe_point = bm.find_first_set_bit(probe_point, boundary_idx); + BitMapAssertEqual(probe_point, next_expected_bit); + probe_point++; // Prepare to look beyond the most recent bit. + } + } + if (probe_point < boundary_idx) { + // In case there are no set bits in the last 1/4 of bit map, confirm that last failed search returns sentinel: boundary_idx + probe_point = bm.find_first_set_bit(probe_point, boundary_idx); + BitMapAssertEqual(probe_point, boundary_idx); + } + + // Make sure find_last_set_bit() works correctly + probe_point = size - 1; + for (ssize_t i = num_set_bits - 1; i >= 0; i--) { + ssize_t next_expected_bit = set_bits[i]; + probe_point = bm.find_last_set_bit(probe_point); + BitMapAssertEqual(probe_point, next_expected_bit); + probe_point--; // Prepare to look before the most recent bit. + } + if (probe_point >= 0) { + probe_point = bm.find_last_set_bit(probe_point); + BitMapAssertEqual(probe_point, (ssize_t) -1); // Verify that last failed search returns sentinel value: -1 + } + + // Confirm that find_last_set_bit() with a bounded search space works correctly + // Limit this search to the last 3/4 of the full bit map + boundary_idx = size / 4; + probe_point = size - 1; + for (ssize_t i = num_set_bits - 1; i >= 0; i--) { + ssize_t next_expected_bit = set_bits[i]; + if (next_expected_bit > boundary_idx) { + probe_point = bm.find_last_set_bit(boundary_idx, probe_point); + BitMapAssertEqual(probe_point, next_expected_bit); + probe_point--; + } else { + break; + } + } + if (probe_point > boundary_idx) { + probe_point = bm.find_last_set_bit(boundary_idx, probe_point); + // Verify that last failed search returns sentinel value: boundary_idx + BitMapAssertEqual(probe_point, boundary_idx); + } + + // What's the longest cluster of consecutive bits + ssize_t previous_value = -2; + ssize_t longest_run = 0; + ssize_t current_run = 0; + for (ssize_t i = 0; i < num_set_bits; i++) { + ssize_t next_expected_bit = set_bits[i]; + if (next_expected_bit == previous_value + 1) { + current_run++; + } else { + previous_value = next_expected_bit; + current_run = 1; + } + if (current_run > longest_run) { + longest_run = current_run; + } + previous_value = next_expected_bit; + } + + // Confirm that find_first_consecutive_set_bits() works for each cluster size known to have at least one match + for (ssize_t cluster_size = 1; cluster_size <= longest_run; cluster_size++) { + // Verify that find_first_consecutive_set_bits() works + ssize_t bit_idx = 0; + ssize_t probe_point = 0; + while ((probe_point <= size - cluster_size) && (bit_idx <= num_set_bits - cluster_size)) { + bool cluster_found = false; + while (!cluster_found && (bit_idx + cluster_size <= num_set_bits)) { + cluster_found = true; + for (ssize_t i = 1; i < cluster_size; i++) { + if (set_bits[bit_idx] + i != set_bits[bit_idx + i]) { + cluster_found = false; + bit_idx++; + break; + } + } + } + if (cluster_found) { + ssize_t next_expected_cluster = set_bits[bit_idx]; + ssize_t orig_probe_point = probe_point; + probe_point = bm.find_first_consecutive_set_bits(orig_probe_point, cluster_size); + BitMapAssertEqual(next_expected_cluster, probe_point); + probe_point++; + bit_idx++; + } else { + bit_idx++; + break; + } + } + if (probe_point < size) { + // Confirm that the last request, which fails to find a cluster, returns sentinel value: num_bits + probe_point = bm.find_first_consecutive_set_bits(probe_point, cluster_size); + BitMapAssertEqual(probe_point, size); + } + + // Repeat the above experiment, using 3/4 size as the search boundary_idx + bit_idx = 0; + probe_point = 0; + boundary_idx = 4 * size / 4; + while ((probe_point <= boundary_idx - cluster_size) && (bit_idx <= num_set_bits - cluster_size)) { + bool cluster_found = false; + while (!cluster_found && (bit_idx + cluster_size <= num_set_bits)) { + cluster_found = true; + for (int i = 1; i < cluster_size; i++) { + if (set_bits[bit_idx] + i != set_bits[bit_idx + i]) { + cluster_found = false; + bit_idx++; + break; + } + } + } + if (cluster_found) { + ssize_t next_expected_cluster = set_bits[bit_idx]; + probe_point = bm.find_first_consecutive_set_bits(probe_point, boundary_idx, cluster_size); + BitMapAssertEqual(next_expected_cluster, probe_point); + probe_point++; + bit_idx++; + } else { + bit_idx++; + } + } + if (probe_point < boundary_idx) { + // Confirm that the last request, which fails to find a cluster, returns sentinel value: boundary_idx + probe_point = bm.find_first_consecutive_set_bits(probe_point, boundary_idx, cluster_size); + BitMapAssertEqual(probe_point, boundary_idx); + } + + // Verify that find_last_consecutive_set_bits() works + bit_idx = num_set_bits - 1; + probe_point = size - 1; + // Iterate over all set bits in reverse order + while (bit_idx + 1 >= cluster_size) { + bool cluster_found = true; + for (int i = 1; i < cluster_size; i++) { + if (set_bits[bit_idx] - i != set_bits[bit_idx - i]) { + cluster_found = false; + break; + } + } + if (cluster_found) { + ssize_t next_expected_cluster = set_bits[bit_idx] + 1 - cluster_size; + probe_point = bm.find_last_consecutive_set_bits(probe_point, cluster_size); + BitMapAssertEqual(next_expected_cluster, probe_point); + probe_point = probe_point + cluster_size - 2; + bit_idx--; + } else { + bit_idx--; + } + } + if (probe_point >= 0) { + // Confirm that the last request, which fails to find a cluster, returns sentinel value: boundary_idx + probe_point = bm.find_last_consecutive_set_bits(boundary_idx, probe_point, cluster_size); + BitMapAssertEqual(probe_point, (ssize_t) boundary_idx); + } + + // Verify that find_last_consecutive_set_bits() works with the search range bounded at 1/4 size + bit_idx = num_set_bits - 1; + probe_point = size - 1; + boundary_idx = size / 4; + while (bit_idx + 1 >= cluster_size) { + bool cluster_found = true; + for (int i = 1; i < cluster_size; i++) { + if (set_bits[bit_idx] - i != set_bits[bit_idx - i]) { + cluster_found = false; + break; + } + } + if (cluster_found && (set_bits[bit_idx] + 1 - cluster_size > boundary_idx)) { + ssize_t next_expected_cluster = set_bits[bit_idx] + 1 - cluster_size; + probe_point = bm.find_last_consecutive_set_bits(boundary_idx, probe_point, cluster_size); + BitMapAssertEqual(next_expected_cluster, probe_point); + probe_point = probe_point + cluster_size - 2; + bit_idx--; + } else if (set_bits[bit_idx] + 1 - cluster_size <= boundary_idx) { + break; + } else { + bit_idx--; + } + } + if (probe_point > boundary_idx) { + // Confirm that the last request, which fails to find a cluster, returns sentinel value: boundary_idx + probe_point = bm.find_last_consecutive_set_bits(boundary_idx, probe_point, cluster_size); + BitMapAssertEqual(probe_point, boundary_idx); + } + } + + // Confirm that find_first_consecutive_set_bits() works for a cluster size known not to have any matches + probe_point = bm.find_first_consecutive_set_bits(0, longest_run + 1); + BitMapAssertEqual(probe_point, size); // Confirm: failed search returns sentinel: size + + probe_point = bm.find_last_consecutive_set_bits(size - 1, longest_run + 1); + BitMapAssertEqual(probe_point, (ssize_t) -1); // Confirm: failed search returns sentinel: -1 + + boundary_idx = 3 * size / 4; + probe_point = bm.find_first_consecutive_set_bits(0, boundary_idx, longest_run + 1); + BitMapAssertEqual(probe_point, boundary_idx); // Confirm: failed search returns sentinel: boundary_idx + + boundary_idx = size / 4; + probe_point = bm.find_last_consecutive_set_bits(boundary_idx, size - 1, longest_run + 1); + BitMapAssertEqual(probe_point, boundary_idx); // Confirm: failed search returns sentinel: boundary_idx + } + +public: + + static bool run_test() { + + _success = false; + _assertion_failures = 0; + + ShenandoahSimpleBitMap bm_small(SMALL_BITMAP_SIZE); + ShenandoahSimpleBitMap bm_large(LARGE_BITMAP_SIZE); + + // Initial state of each bitmap is all bits are clear. Confirm this: + ssize_t set_bits_0[1] = { 0 }; + verifyBitMapState(bm_small, SMALL_BITMAP_SIZE, set_bits_0, 0); + verifyBitMapState(bm_large, LARGE_BITMAP_SIZE, set_bits_0, 0); + + bm_small.set_bit(5); + bm_small.set_bit(63); + bm_small.set_bit(128); + ssize_t set_bits_1[3] = { 5, 63, 128 }; + verifyBitMapState(bm_small, SMALL_BITMAP_SIZE, set_bits_1, 3); + + bm_large.set_bit(5); + bm_large.set_bit(63); + bm_large.set_bit(128); + verifyBitMapState(bm_large, LARGE_BITMAP_SIZE, set_bits_1, 3); + + // Test some consecutive bits + bm_small.set_bit(140); + bm_small.set_bit(141); + bm_small.set_bit(142); + + bm_small.set_bit(253); + bm_small.set_bit(254); + bm_small.set_bit(255); + + bm_small.set_bit(271); + bm_small.set_bit(272); + + bm_small.set_bit(320); + bm_small.set_bit(321); + bm_small.set_bit(322); + + bm_small.set_bit(361); + + ssize_t set_bits_2[15] = { 5, 63, 128, 140, 141, 142, 253, 254, 255, 271, 272, 320, 321, 322, 361 }; + verifyBitMapState(bm_small, SMALL_BITMAP_SIZE, set_bits_2, 15); + + bm_large.set_bit(140); + bm_large.set_bit(141); + bm_large.set_bit(142); + + bm_large.set_bit(1021); + bm_large.set_bit(1022); + bm_large.set_bit(1023); + + bm_large.set_bit(1051); + + bm_large.set_bit(1280); + bm_large.set_bit(1281); + bm_large.set_bit(1282); + + bm_large.set_bit(1300); + bm_large.set_bit(1301); + bm_large.set_bit(1302); + + ssize_t set_bits_3[16] = { 5, 63, 128, 140, 141, 142, 1021, 1022, 1023, 1051, 1280, 1281, 1282, 1300, 1301, 1302 }; + verifyBitMapState(bm_large, LARGE_BITMAP_SIZE, set_bits_3, 16); + + // Test clear_bit + bm_small.clear_bit(141); + bm_small.clear_bit(253); + ssize_t set_bits_4[13] = { 5, 63, 128, 140, 142, 254, 255, 271, 272, 320, 321, 322, 361 }; + verifyBitMapState(bm_small, SMALL_BITMAP_SIZE, set_bits_4, 13); + + bm_large.clear_bit(5); + bm_large.clear_bit(63); + bm_large.clear_bit(128); + bm_large.clear_bit(141); + ssize_t set_bits_5[12] = { 140, 142, 1021, 1022, 1023, 1051, 1280, 1281, 1282, 1300, 1301, 1302 }; + verifyBitMapState(bm_large, LARGE_BITMAP_SIZE, set_bits_5, 12); + + // Look for large island of contiguous surrounded by smaller islands of contiguous + bm_large.set_bit(1024); + bm_large.set_bit(1025); // size-5 island from 1021 to 1025 + bm_large.set_bit(1027); + bm_large.set_bit(1028); + bm_large.set_bit(1029); + bm_large.set_bit(1030); + bm_large.set_bit(1031); + bm_large.set_bit(1032); // size-6 island from 1027 to 1032 + bm_large.set_bit(1034); + bm_large.set_bit(1035); + bm_large.set_bit(1036); // size-3 island from 1034 to 1036 + ssize_t set_bits_6[23] = { 140, 142, 1021, 1022, 1023, 1024, 1025, 1027, 1028, 1029, 1030, + 1031, 1032, 1034, 1035, 1036, 1051, 1280, 1281, 1282, 1300, 1301, 1302 }; + verifyBitMapState(bm_large, LARGE_BITMAP_SIZE, set_bits_6, 23); + + // Test that entire bitmap word (from 1024 to 1088) is 1's + ssize_t set_bits_7[76]; + set_bits_7[0] = 140; + set_bits_7[1] = 142; + set_bits_7[2] = 1021; + set_bits_7[3] = 1022; + set_bits_7[4] = 1023; + size_t bit_idx = 5; + for (ssize_t i = 1024; i <= 1088; i++) { + bm_large.set_bit(i); + set_bits_7[bit_idx++] = i; + } + set_bits_7[bit_idx++] = 1280; + set_bits_7[bit_idx++] = 1281; + set_bits_7[bit_idx++] = 1282; + set_bits_7[bit_idx++] = 1300; + set_bits_7[bit_idx++] = 1301; + set_bits_7[bit_idx++] = 1302; + verifyBitMapState(bm_large, LARGE_BITMAP_SIZE, set_bits_7, bit_idx); + + // Test clear_all() + bm_small.clear_all(); + bm_large.clear_all(); + + verifyBitMapState(bm_small, SMALL_BITMAP_SIZE, set_bits_0, 0); + verifyBitMapState(bm_large, LARGE_BITMAP_SIZE, set_bits_0, 0); + + _success = true; + return true; + } + +}; + +TEST(BasicShenandoahSimpleBitMapTest, minimum_test) { + + bool result = ShenandoahSimpleBitMapTest::run_test(); + ASSERT_EQ(result, true); + ASSERT_EQ(_success, true); + ASSERT_EQ(_assertion_failures, (size_t) 0); +}