8261644: NMT: Simplifications and cleanups

Reviewed-by: coleenp, zgu
This commit is contained in:
Thomas Stuefe 2021-02-19 06:20:38 +00:00
parent ed93bc9a01
commit 5caf686c80
10 changed files with 134 additions and 223 deletions

@ -30,19 +30,19 @@
// Allocation site represents a code path that makes a memory
// allocation
template <class E> class AllocationSite {
class AllocationSite {
private:
NativeCallStack _call_stack;
E e;
MEMFLAGS _flag;
const NativeCallStack _call_stack;
const MEMFLAGS _flag;
public:
AllocationSite(const NativeCallStack& stack, MEMFLAGS flag) : _call_stack(stack), _flag(flag) { }
int hash() const { return _call_stack.hash(); }
bool equals(const NativeCallStack& stack) const {
return _call_stack.equals(stack);
}
bool equals(const AllocationSite<E>& other) const {
bool equals(const AllocationSite& other) const {
return other.equals(_call_stack);
}
@ -50,10 +50,6 @@ template <class E> class AllocationSite {
return &_call_stack;
}
// Information regarding this allocation
E* data() { return &e; }
const E* peek() const { return &e; }
MEMFLAGS flag() const { return _flag; }
};

@ -242,6 +242,82 @@ void MallocSiteTable::AccessLock::exclusiveLock() {
_lock_state = ExclusiveLock;
}
void MallocSiteTable::print_tuning_statistics(outputStream* st) {
AccessLock locker(&_access_count);
if (locker.sharedLock()) {
// Total number of allocation sites, include empty sites
int total_entries = 0;
// Number of allocation sites that have all memory freed
int empty_entries = 0;
// Number of captured call stack distribution
int stack_depth_distribution[NMT_TrackingStackDepth + 1] = { 0 };
// Chain lengths
int lengths[table_size] = { 0 };
for (int i = 0; i < table_size; i ++) {
int this_chain_length = 0;
const MallocSiteHashtableEntry* head = _table[i];
while (head != NULL) {
total_entries ++;
this_chain_length ++;
if (head->size() == 0) {
empty_entries ++;
}
const int callstack_depth = head->peek()->call_stack()->frames();
assert(callstack_depth >= 0 && callstack_depth <= NMT_TrackingStackDepth,
"Sanity (%d)", callstack_depth);
stack_depth_distribution[callstack_depth] ++;
head = head->next();
}
lengths[i] = this_chain_length;
}
st->print_cr("Malloc allocation site table:");
st->print_cr("\tTotal entries: %d", total_entries);
st->print_cr("\tEmpty entries: %d (%2.2f%%)", empty_entries, ((float)empty_entries * 100) / total_entries);
st->cr();
// We report the hash distribution (chain length distribution) of the n shortest chains
// - under the assumption that this usually contains all lengths. Reporting threshold
// is 20, and the expected avg chain length is 5..6 (see table size).
static const int chain_length_threshold = 20;
int chain_length_distribution[chain_length_threshold] = { 0 };
int over_threshold = 0;
int longest_chain_length = 0;
for (int i = 0; i < table_size; i ++) {
if (lengths[i] >= chain_length_threshold) {
over_threshold ++;
} else {
chain_length_distribution[lengths[i]] ++;
}
longest_chain_length = MAX2(longest_chain_length, lengths[i]);
}
st->print_cr("Hash distribution:");
if (chain_length_distribution[0] == 0) {
st->print_cr("no empty buckets.");
} else {
st->print_cr("%d buckets are empty.", chain_length_distribution[0]);
}
for (int len = 1; len < MIN2(longest_chain_length + 1, chain_length_threshold); len ++) {
st->print_cr("%2d %s: %d.", len, (len == 1 ? " entry" : "entries"), chain_length_distribution[len]);
}
if (longest_chain_length >= chain_length_threshold) {
st->print_cr(">=%2d entries: %d.", chain_length_threshold, over_threshold);
}
st->print_cr("most entries: %d.", longest_chain_length);
st->cr();
st->print_cr("Call stack depth distribution:");
for (int i = 0; i <= NMT_TrackingStackDepth; i ++) {
st->print_cr("\t%d: %d", i, stack_depth_distribution[i]);
}
st->cr();
} // lock
}
bool MallocSiteHashtableEntry::atomic_insert(MallocSiteHashtableEntry* entry) {
return Atomic::replace_if_null(&_next, entry);
}

@ -38,22 +38,19 @@
// MallocSite represents a code path that eventually calls
// os::malloc() to allocate memory
class MallocSite : public AllocationSite<MemoryCounter> {
class MallocSite : public AllocationSite {
MemoryCounter _c;
public:
MallocSite() :
AllocationSite<MemoryCounter>(NativeCallStack::empty_stack(), mtNone) {}
MallocSite(const NativeCallStack& stack, MEMFLAGS flags) :
AllocationSite<MemoryCounter>(stack, flags) {}
AllocationSite(stack, flags) {}
void allocate(size_t size) { data()->allocate(size); }
void deallocate(size_t size) { data()->deallocate(size); }
void allocate(size_t size) { _c.allocate(size); }
void deallocate(size_t size) { _c.deallocate(size); }
// Memory allocated from this code path
size_t size() const { return peek()->size(); }
size_t size() const { return _c.size(); }
// The number of calls were made
size_t count() const { return peek()->count(); }
size_t count() const { return _c.count(); }
};
// Malloc site hashtable entry
@ -63,7 +60,6 @@ class MallocSiteHashtableEntry : public CHeapObj<mtNMT> {
MallocSiteHashtableEntry* volatile _next;
public:
MallocSiteHashtableEntry() : _next(NULL) { }
MallocSiteHashtableEntry(NativeCallStack stack, MEMFLAGS flags):
_malloc_site(stack, flags), _next(NULL) {
@ -79,17 +75,9 @@ class MallocSiteHashtableEntry : public CHeapObj<mtNMT> {
// The operation can be failed due to contention from other thread.
bool atomic_insert(MallocSiteHashtableEntry* entry);
void set_callsite(const MallocSite& site) {
_malloc_site = site;
}
inline const MallocSite* peek() const { return &_malloc_site; }
inline MallocSite* data() { return &_malloc_site; }
inline long hash() const { return _malloc_site.hash(); }
inline bool equals(const NativeCallStack& stack) const {
return _malloc_site.equals(stack);
}
// Allocation/deallocation on this allocation site
inline void allocate(size_t size) { _malloc_site.allocate(size); }
inline void deallocate(size_t size) { _malloc_site.deallocate(size); }
@ -229,6 +217,8 @@ class MallocSiteTable : AllStatic {
// Walk this table.
static bool walk_malloc_site(MallocSiteWalker* walker);
static void print_tuning_statistics(outputStream* st);
private:
static MallocSiteHashtableEntry* new_entry(const NativeCallStack& key, MEMFLAGS flags);
static void reset();

@ -211,138 +211,12 @@ void MemTracker::report(bool summary_only, outputStream* output, size_t scale) {
}
}
// This is a walker to gather malloc site hashtable statistics,
// the result is used for tuning.
class StatisticsWalker : public MallocSiteWalker {
private:
enum Threshold {
// aggregates statistics over this threshold into one
// line item.
report_threshold = 20
};
private:
// Number of allocation sites that have all memory freed
int _empty_entries;
// Total number of allocation sites, include empty sites
int _total_entries;
// Number of captured call stack distribution
int _stack_depth_distribution[NMT_TrackingStackDepth];
// Hash distribution
int _hash_distribution[report_threshold];
// Number of hash buckets that have entries over the threshold
int _bucket_over_threshold;
// The hash bucket that walker is currently walking
int _current_hash_bucket;
// The length of current hash bucket
int _current_bucket_length;
// Number of hash buckets that are not empty
int _used_buckets;
// Longest hash bucket length
int _longest_bucket_length;
public:
StatisticsWalker() : _empty_entries(0), _total_entries(0) {
int index = 0;
for (index = 0; index < NMT_TrackingStackDepth; index ++) {
_stack_depth_distribution[index] = 0;
}
for (index = 0; index < report_threshold; index ++) {
_hash_distribution[index] = 0;
}
_bucket_over_threshold = 0;
_longest_bucket_length = 0;
_current_hash_bucket = -1;
_current_bucket_length = 0;
_used_buckets = 0;
}
virtual bool do_malloc_site(const MallocSite* e) {
if (e->size() == 0) _empty_entries ++;
_total_entries ++;
// stack depth distrubution
int frames = e->call_stack()->frames();
_stack_depth_distribution[frames - 1] ++;
// hash distribution
int hash_bucket = ((unsigned)e->hash()) % MallocSiteTable::hash_buckets();
if (_current_hash_bucket == -1) {
_current_hash_bucket = hash_bucket;
_current_bucket_length = 1;
} else if (_current_hash_bucket == hash_bucket) {
_current_bucket_length ++;
} else {
record_bucket_length(_current_bucket_length);
_current_hash_bucket = hash_bucket;
_current_bucket_length = 1;
}
return true;
}
// walk completed
void completed() {
record_bucket_length(_current_bucket_length);
}
void report_statistics(outputStream* out) {
int index;
out->print_cr("Malloc allocation site table:");
out->print_cr("\tTotal entries: %d", _total_entries);
out->print_cr("\tEmpty entries: %d (%2.2f%%)", _empty_entries, ((float)_empty_entries * 100) / _total_entries);
out->print_cr(" ");
out->print_cr("Hash distribution:");
if (_used_buckets < MallocSiteTable::hash_buckets()) {
out->print_cr("empty bucket: %d", (MallocSiteTable::hash_buckets() - _used_buckets));
}
for (index = 0; index < report_threshold; index ++) {
if (_hash_distribution[index] != 0) {
if (index == 0) {
out->print_cr(" %d entry: %d", 1, _hash_distribution[0]);
} else if (index < 9) { // single digit
out->print_cr(" %d entries: %d", (index + 1), _hash_distribution[index]);
} else {
out->print_cr(" %d entries: %d", (index + 1), _hash_distribution[index]);
}
}
}
if (_bucket_over_threshold > 0) {
out->print_cr(" >%d entries: %d", report_threshold, _bucket_over_threshold);
}
out->print_cr("most entries: %d", _longest_bucket_length);
out->print_cr(" ");
out->print_cr("Call stack depth distribution:");
for (index = 0; index < NMT_TrackingStackDepth; index ++) {
if (_stack_depth_distribution[index] > 0) {
out->print_cr("\t%d: %d", index + 1, _stack_depth_distribution[index]);
}
}
}
private:
void record_bucket_length(int length) {
_used_buckets ++;
if (length <= report_threshold) {
_hash_distribution[length - 1] ++;
} else {
_bucket_over_threshold ++;
}
_longest_bucket_length = MAX2(_longest_bucket_length, length);
}
};
void MemTracker::tuning_statistics(outputStream* out) {
// NMT statistics
StatisticsWalker walker;
MallocSiteTable::walk_malloc_site(&walker);
walker.completed();
out->print_cr("Native Memory Tracking Statistics:");
out->print_cr("Malloc allocation site table size: %d", MallocSiteTable::hash_buckets());
out->print_cr(" Tracking stack depth: %d", NMT_TrackingStackDepth);
NOT_PRODUCT(out->print_cr("Peak concurrent access: %d", MallocSiteTable::access_peak_count());)
out->print_cr(" ");
walker.report_statistics(out);
out->cr();
MallocSiteTable::print_tuning_statistics(out);
}

@ -87,9 +87,9 @@ class MemTracker : AllStatic {
#include "services/virtualMemoryTracker.hpp"
#define CURRENT_PC ((MemTracker::tracking_level() == NMT_detail) ? \
NativeCallStack(0, true) : NativeCallStack::empty_stack())
NativeCallStack(0) : NativeCallStack::empty_stack())
#define CALLER_PC ((MemTracker::tracking_level() == NMT_detail) ? \
NativeCallStack(1, true) : NativeCallStack::empty_stack())
NativeCallStack(1) : NativeCallStack::empty_stack())
class MemBaseline;

@ -91,7 +91,7 @@ void ThreadStackTracker::delete_thread_stack(void* base, size_t size) {
if (MemTracker::tracking_level() == NMT_detail) {
ThreadCritical tc;
assert(_simple_thread_stacks != NULL, "Must be initialized");
SimpleThreadStackSite site((address)base, size);
SimpleThreadStackSite site((address)base, size, NativeCallStack::empty_stack()); // Fake object just to serve as compare target for delete
bool removed = _simple_thread_stacks->remove(site);
assert(removed, "Must exist");
}

@ -33,39 +33,14 @@
#include "utilities/nativeCallStack.hpp"
#include "utilities/linkedlist.hpp"
class SimpleThreadStackSite;
class SimpleThreadStack {
friend class SimpleThreadStackSite;
private:
address _base;
size_t _size;
public:
SimpleThreadStack() : _base(NULL), _size(0) { }
bool equals(const SimpleThreadStack& s) const {
return base() == s.base();
}
size_t size() const { return _size; }
address base() const { return _base; }
private:
void set_size(size_t size) { _size = size; }
void set_base(address base) { _base = base; }
};
class SimpleThreadStackSite : public AllocationSite<SimpleThreadStack> {
class SimpleThreadStackSite : public AllocationSite {
const address _base;
const size_t _size;
public:
SimpleThreadStackSite(address base, size_t size, const NativeCallStack& stack) :
AllocationSite<SimpleThreadStack>(stack, mtThreadStack) {
data()->set_size(size);
data()->set_base(base);
}
SimpleThreadStackSite(address base, size_t size) :
AllocationSite<SimpleThreadStack>(NativeCallStack::empty_stack(), mtThreadStack) {
data()->set_base(base);
data()->set_size(size);
}
AllocationSite(stack, mtThreadStack),
_base(base),
_size(size) {}
bool equals(const SimpleThreadStackSite& mts) const {
bool eq = base() == mts.base();
@ -73,8 +48,8 @@ public:
return eq;
}
size_t size() const { return peek()->size(); }
address base() const { return peek()->base(); }
size_t size() const { return _size; }
address base() const { return _base; }
};
/*

@ -68,17 +68,18 @@ class VirtualMemory {
};
// Virtual memory allocation site, keeps track where the virtual memory is reserved.
class VirtualMemoryAllocationSite : public AllocationSite<VirtualMemory> {
class VirtualMemoryAllocationSite : public AllocationSite {
VirtualMemory _c;
public:
VirtualMemoryAllocationSite(const NativeCallStack& stack, MEMFLAGS flag) :
AllocationSite<VirtualMemory>(stack, flag) { }
AllocationSite(stack, flag) { }
inline void reserve_memory(size_t sz) { data()->reserve_memory(sz); }
inline void commit_memory (size_t sz) { data()->commit_memory(sz); }
inline void uncommit_memory(size_t sz) { data()->uncommit_memory(sz); }
inline void release_memory(size_t sz) { data()->release_memory(sz); }
inline size_t reserved() const { return peek()->reserved(); }
inline size_t committed() const { return peek()->committed(); }
inline void reserve_memory(size_t sz) { _c.reserve_memory(sz); }
inline void commit_memory (size_t sz) { _c.commit_memory(sz); }
inline void uncommit_memory(size_t sz) { _c.uncommit_memory(sz); }
inline void release_memory(size_t sz) { _c.release_memory(sz); }
inline size_t reserved() const { return _c.reserved(); }
inline size_t committed() const { return _c.committed(); }
};
class VirtualMemorySummary;

@ -28,6 +28,8 @@
#include "utilities/globalDefinitions.hpp"
#include "utilities/nativeCallStack.hpp"
const NativeCallStack NativeCallStack::_empty_stack; // Uses default ctor
static unsigned int calculate_hash(address stack[NMT_TrackingStackDepth]) {
uintptr_t hash = 0;
for (int i = 0; i < NMT_TrackingStackDepth; i++) {
@ -36,29 +38,23 @@ static unsigned int calculate_hash(address stack[NMT_TrackingStackDepth]) {
return hash;
}
NativeCallStack::NativeCallStack(int toSkip, bool fillStack) :
NativeCallStack::NativeCallStack(int toSkip) :
_hash_value(0) {
if (fillStack) {
// We need to skip the NativeCallStack::NativeCallStack frame if a tail call is NOT used
// to call os::get_native_stack. A tail call is used if _NMT_NOINLINE_ is not defined
// (which means this is not a slowdebug build), and we are on 64-bit (except Windows).
// This is not necessarily a rule, but what has been obvserved to date.
// We need to skip the NativeCallStack::NativeCallStack frame if a tail call is NOT used
// to call os::get_native_stack. A tail call is used if _NMT_NOINLINE_ is not defined
// (which means this is not a slowdebug build), and we are on 64-bit (except Windows).
// This is not necessarily a rule, but what has been obvserved to date.
#if (defined(_NMT_NOINLINE_) || defined(_WINDOWS) || !defined(_LP64) || defined(PPC64))
// Not a tail call.
toSkip++;
// Not a tail call.
toSkip++;
#if (defined(_NMT_NOINLINE_) && defined(BSD) && defined(_LP64))
// Mac OS X slowdebug builds have this odd behavior where NativeCallStack::NativeCallStack
// appears as two frames, so we need to skip an extra frame.
toSkip++;
// Mac OS X slowdebug builds have this odd behavior where NativeCallStack::NativeCallStack
// appears as two frames, so we need to skip an extra frame.
toSkip++;
#endif // Special-case for BSD.
#endif // Not a tail call.
os::get_native_stack(_stack, NMT_TrackingStackDepth, toSkip);
} else {
for (int index = 0; index < NMT_TrackingStackDepth; index ++) {
_stack[index] = NULL;
}
}
os::get_native_stack(_stack, NMT_TrackingStackDepth, toSkip);
_hash_value = calculate_hash(_stack);
}

@ -57,15 +57,18 @@ class NativeCallStack : public StackObj {
private:
address _stack[NMT_TrackingStackDepth];
unsigned int _hash_value;
static const NativeCallStack _empty_stack;
public:
NativeCallStack(int toSkip = 0, bool fillStack = false);
// Default ctor creates an empty stack.
// (it may make sense to remove this altogether but its used in a few places).
NativeCallStack() : _hash_value(0) {
memset(_stack, 0, sizeof(_stack));
}
NativeCallStack(int toSkip);
NativeCallStack(address* pc, int frameCount);
static inline const NativeCallStack& empty_stack() {
static const NativeCallStack EMPTY_STACK(0, false);
return EMPTY_STACK;
}
static inline const NativeCallStack& empty_stack() { return _empty_stack; }
// if it is an empty stack
inline bool is_empty() const {