8274298: JFR Thread Sampler thread must not acquire malloc lock after suspending a thread because of possible deadlock

Reviewed-by: egahlin
This commit is contained in:
Markus Grönlund 2021-12-09 09:29:59 +00:00
parent 84baea753a
commit 965ea8d9cd
9 changed files with 109 additions and 28 deletions

View File

@ -27,8 +27,10 @@
#include "jfr/recorder/jfrRecorder.hpp"
#include "jfr/periodic/sampling/jfrCallTrace.hpp"
#include "jfr/periodic/sampling/jfrThreadSampler.hpp"
#include "jfr/recorder/checkpoint/types/traceid/jfrTraceIdLoadBarrier.inline.hpp"
#include "jfr/recorder/service/jfrOptionSet.hpp"
#include "jfr/recorder/stacktrace/jfrStackTraceRepository.hpp"
#include "jfr/recorder/storage/jfrBuffer.hpp"
#include "jfr/support/jfrThreadId.hpp"
#include "jfr/support/jfrThreadLocal.hpp"
#include "jfr/utilities/jfrTime.hpp"
@ -323,10 +325,15 @@ class JfrThreadSampler : public NonJavaThread {
JavaThread* _last_thread_native;
size_t _interval_java;
size_t _interval_native;
const size_t _min_size; // for enqueue buffer monitoring
const size_t _renew_size;
int _cur_index;
const u4 _max_frames;
volatile bool _disenrolled;
const JfrBuffer* get_enqueue_buffer();
const JfrBuffer* renew_if_full(const JfrBuffer* enqueue_buffer);
JavaThread* next_thread(ThreadsList* t_list, JavaThread* first_sampled, JavaThread* current);
void task_stacktrace(JfrSampleType type, JavaThread** last_thread);
JfrThreadSampler(size_t interval_java, size_t interval_native, u4 max_frames);
@ -396,6 +403,8 @@ JfrThreadSampler::JfrThreadSampler(size_t interval_java, size_t interval_native,
_last_thread_native(NULL),
_interval_java(interval_java),
_interval_native(interval_native),
_min_size(JfrOptionSet::stackdepth() * sizeof(intptr_t)),
_renew_size(_min_size * 2),
_cur_index(-1),
_max_frames(max_frames),
_disenrolled(true) {
@ -520,6 +529,15 @@ void JfrThreadSampler::post_run() {
delete this;
}
const JfrBuffer* JfrThreadSampler::get_enqueue_buffer() {
const JfrBuffer* buffer = JfrTraceIdLoadBarrier::get_enqueue_buffer(this);
return buffer != nullptr ? renew_if_full(buffer) : JfrTraceIdLoadBarrier::renew_enqueue_buffer(_renew_size, this);
}
const JfrBuffer* JfrThreadSampler::renew_if_full(const JfrBuffer* enqueue_buffer) {
assert(enqueue_buffer != nullptr, "invariant");
return enqueue_buffer->free_size() < _min_size ? JfrTraceIdLoadBarrier::renew_enqueue_buffer(_renew_size, this) : enqueue_buffer;
}
void JfrThreadSampler::task_stacktrace(JfrSampleType type, JavaThread** last_thread) {
ResourceMark rm;
@ -530,7 +548,6 @@ void JfrThreadSampler::task_stacktrace(JfrSampleType type, JavaThread** last_thr
const uint sample_limit = JAVA_SAMPLE == type ? MAX_NR_OF_JAVA_SAMPLES : MAX_NR_OF_NATIVE_SAMPLES;
uint num_samples = 0;
JavaThread* start = NULL;
{
elapsedTimer sample_time;
sample_time.start();
@ -542,6 +559,15 @@ void JfrThreadSampler::task_stacktrace(JfrSampleType type, JavaThread** last_thr
_cur_index = tlh.list()->find_index_of_JavaThread(*last_thread);
JavaThread* current = _cur_index != -1 ? *last_thread : NULL;
// Explicitly monitor the available space of the thread-local buffer used by the load barrier
// for enqueuing klasses as part of tagging methods. We do this because if space becomes sparse,
// we cannot rely on the implicit allocation of a new buffer as part of the regular tag mechanism.
// If the free list is empty, a malloc could result, and the problem with that is that the thread
// we have suspended could be the holder of the malloc lock. Instead, the buffer is pre-emptively
// renewed before thread suspension.
const JfrBuffer* enqueue_buffer = get_enqueue_buffer();
assert(enqueue_buffer != nullptr, "invariant");
while (num_samples < sample_limit) {
current = next_thread(tlh.list(), start, current);
if (current == NULL) {
@ -553,9 +579,11 @@ void JfrThreadSampler::task_stacktrace(JfrSampleType type, JavaThread** last_thr
if (current->is_Compiler_thread()) {
continue;
}
assert(enqueue_buffer->free_size() >= _min_size, "invariant");
if (sample_task.do_sample_thread(current, _frames, _max_frames, type)) {
num_samples++;
}
enqueue_buffer = renew_if_full(enqueue_buffer);
}
*last_thread = current; // remember the thread we last attempted to sample
}

View File

@ -28,6 +28,7 @@
#include "jfr/utilities/jfrAllocation.hpp"
class JavaThread;
class JfrBuffer;
class JfrStackFrame;
class JfrThreadSampler;
class Thread;

View File

@ -247,6 +247,14 @@ void JfrTraceIdKlassQueue::enqueue(const Klass* klass) {
_queue->enqueue(klass);
}
JfrBuffer* JfrTraceIdKlassQueue::get_enqueue_buffer(Thread* thread) {
return _queue->thread_local_storage(thread);
}
JfrBuffer* JfrTraceIdKlassQueue::renew_enqueue_buffer(size_t size, Thread* thread) {
return _queue->renew(size, thread);
}
void JfrTraceIdKlassQueue::iterate(klass_callback callback, bool previous_epoch) {
assert_locked_or_safepoint(ClassLoaderDataGraph_lock);
KlassFunctor functor(callback);

View File

@ -28,6 +28,7 @@
#include "jfr/utilities/jfrAllocation.hpp"
#include "jfr/utilities/jfrEpochQueue.hpp"
class JfrBuffer;
class Klass;
class Thread;
@ -47,7 +48,7 @@ class KlassFunctor {
// It details how to store and process an enqueued Klass representation. See utilities/jfrEpochQueue.hpp.
//
template <typename Buffer>
class JfrEpochQueueKlassPolicy {
class JfrEpochQueueKlassPolicy : public JfrCHeapObj {
public:
typedef Buffer* BufferPtr;
typedef Klass Type;
@ -64,8 +65,11 @@ class JfrEpochQueueKlassPolicy {
};
class JfrTraceIdKlassQueue : public JfrCHeapObj {
friend class JfrTraceIdLoadBarrier;
private:
JfrEpochQueue<JfrEpochQueueKlassPolicy>* _queue;
JfrBuffer* get_enqueue_buffer(Thread* thread);
JfrBuffer* renew_enqueue_buffer(size_t size, Thread* thread);
public:
JfrTraceIdKlassQueue();
~JfrTraceIdKlassQueue();

View File

@ -68,3 +68,11 @@ void JfrTraceIdLoadBarrier::do_klasses(klass_callback callback, bool previous_ep
assert_locked_or_safepoint(ClassLoaderDataGraph_lock);
klass_queue().iterate(callback, previous_epoch);
}
JfrBuffer* JfrTraceIdLoadBarrier::get_enqueue_buffer(Thread* thread) {
return klass_queue().get_enqueue_buffer(thread);
}
JfrBuffer* JfrTraceIdLoadBarrier::renew_enqueue_buffer(size_t size, Thread* thread) {
return klass_queue().renew_enqueue_buffer(size, thread);
}

View File

@ -29,6 +29,7 @@
#include "memory/allocation.hpp"
class ClassLoaderData;
class JfrBuffer;
class Klass;
class Method;
class ModuleEntry;
@ -69,12 +70,16 @@ class PackageEntry;
class JfrTraceIdLoadBarrier : AllStatic {
friend class Jfr;
friend class JfrCheckpointManager;
friend class JfrStackTrace;
friend class JfrThreadSampler;
private:
static bool initialize();
static void clear();
static void destroy();
static void enqueue(const Klass* klass);
static void load_barrier(const Klass* klass);
static JfrBuffer* get_enqueue_buffer(Thread* thread);
static JfrBuffer* renew_enqueue_buffer(size_t size, Thread* thread);
public:
static traceid load(const ClassLoaderData* cld);
static traceid load(const Klass* klass);

View File

@ -27,6 +27,7 @@
#include "jfr/recorder/checkpoint/types/traceid/jfrTraceId.inline.hpp"
#include "jfr/recorder/repository/jfrChunkWriter.hpp"
#include "jfr/recorder/stacktrace/jfrStackTrace.hpp"
#include "jfr/recorder/storage/jfrBuffer.hpp"
#include "jfr/support/jfrMethodLookup.hpp"
#include "memory/allocation.inline.hpp"
#include "oops/instanceKlass.inline.hpp"
@ -175,11 +176,22 @@ void vframeStreamSamples::samples_next() {
} while (!fill_from_frame());
}
static const size_t min_valid_free_size_bytes = 16;
static inline bool is_full(const JfrBuffer* enqueue_buffer) {
return enqueue_buffer->free_size() < min_valid_free_size_bytes;
}
bool JfrStackTrace::record_thread(JavaThread& thread, frame& frame) {
// Explicitly monitor the available space of the thread-local buffer used for enqueuing klasses as part of tagging methods.
// We do this because if space becomes sparse, we cannot rely on the implicit allocation of a new buffer as part of the
// regular tag mechanism. If the free list is empty, a malloc could result, and the problem with that is that the thread
// we have suspended could be the holder of the malloc lock. If there is no more available space, the attempt is aborted.
const JfrBuffer* const enqueue_buffer = JfrTraceIdLoadBarrier::get_enqueue_buffer(Thread::current());
assert(enqueue_buffer != nullptr, "invariant");
vframeStreamSamples st(&thread, frame, false);
u4 count = 0;
_reached_root = true;
_hash = 1;
while (!st.at_end()) {
if (count >= _max_frames) {
@ -187,7 +199,7 @@ bool JfrStackTrace::record_thread(JavaThread& thread, frame& frame) {
break;
}
const Method* method = st.method();
if (!Method::is_valid_method(method)) {
if (!Method::is_valid_method(method) || is_full(enqueue_buffer)) {
// we throw away everything we've gathered in this sample since
// none of it is safe
return false;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -27,6 +27,8 @@
#include "jfr/recorder/storage/jfrEpochStorage.hpp"
class Thread;
/*
* An ElmentPolicy template template argument provides the implementation for how elements
* associated with the queue is encoded and managed by exposing the following members:
@ -43,7 +45,7 @@
* size_t operator()(const u1* next_element, Callback& callback, bool previous_epoch = false);
*/
template <template <typename> class ElementPolicy>
class JfrEpochQueue : public JfrCHeapObj {
class JfrEpochQueue : public ElementPolicy<typename JfrEpochStorage::Buffer> {
public:
typedef JfrEpochStorage::Buffer Buffer;
typedef JfrEpochStorage::BufferPtr BufferPtr;
@ -53,22 +55,20 @@ class JfrEpochQueue : public JfrCHeapObj {
~JfrEpochQueue();
bool initialize(size_t min_buffer_size, size_t free_list_cache_count_limit, size_t cache_prealloc_count);
void enqueue(TypePtr t);
BufferPtr renew(size_t size, Thread* thread);
template <typename Callback>
void iterate(Callback& callback, bool previous_epoch = false);
private:
typedef ElementPolicy<Buffer> Policy;
Policy _policy;
JfrEpochStorage* _storage;
BufferPtr storage_for_element(TypePtr t, size_t element_size);
template <typename Callback>
class ElementDispatch {
private:
Callback& _callback;
Policy& _policy;
JfrEpochQueue& _queue;
public:
typedef Buffer Type;
ElementDispatch(Callback& callback, Policy& policy);
ElementDispatch(Callback& callback, JfrEpochQueue& queue);
size_t operator()(const u1* element, bool previous_epoch);
};
};

View File

@ -26,13 +26,12 @@
#define SHARE_JFR_UTILITIES_JFREPOCHQUEUE_INLINE_HPP
#include "jfr/utilities/jfrEpochQueue.hpp"
#include "jfr/recorder/storage/jfrEpochStorage.inline.hpp"
#include "jfr/recorder/storage/jfrStorageUtils.inline.hpp"
#include "runtime/thread.inline.hpp"
template <template <typename> class ElementPolicy>
JfrEpochQueue<ElementPolicy>::JfrEpochQueue() : _policy(), _storage(NULL) {}
JfrEpochQueue<ElementPolicy>::JfrEpochQueue() : _storage(NULL) {}
template <template <typename> class ElementPolicy>
JfrEpochQueue<ElementPolicy>::~JfrEpochQueue() {
@ -46,45 +45,61 @@ bool JfrEpochQueue<ElementPolicy>::initialize(size_t min_buffer_size, size_t fre
return _storage != NULL && _storage->initialize(min_buffer_size, free_list_cache_count_limit, cache_prealloc_count);
}
template <template <typename> class ElementPolicy>
inline typename JfrEpochQueue<ElementPolicy>::BufferPtr
JfrEpochQueue<ElementPolicy>::renew(size_t size, Thread* thread) {
assert(thread != nullptr, "invariant");
BufferPtr buffer = this->thread_local_storage(thread);
if (buffer != nullptr) {
_storage->release(buffer);
}
buffer = _storage->acquire(size, thread);
assert(buffer != nullptr, "invariant");
assert(buffer->free_size() >= size, "invariant");
this->set_thread_local_storage(buffer, thread);
assert(this->thread_local_storage(thread) == buffer, "invariant");
return buffer;
}
template <template <typename> class ElementPolicy>
inline typename JfrEpochQueue<ElementPolicy>::BufferPtr
JfrEpochQueue<ElementPolicy>::storage_for_element(JfrEpochQueue<ElementPolicy>::TypePtr t, size_t element_size) {
assert(_policy.element_size(t) == element_size, "invariant");
assert(this->element_size(t) == element_size, "invariant");
Thread* const thread = Thread::current();
BufferPtr buffer = _policy.thread_local_storage(thread);
if (buffer == NULL) {
BufferPtr buffer = this->thread_local_storage(thread);
if (buffer == nullptr) {
buffer = _storage->acquire(element_size, thread);
_policy.set_thread_local_storage(buffer, thread);
this->set_thread_local_storage(buffer, thread);
} else if (buffer->free_size() < element_size) {
_storage->release(buffer);
buffer = _storage->acquire(element_size, thread);
_policy.set_thread_local_storage(buffer, thread);
this->set_thread_local_storage(buffer, thread);
}
assert(buffer->free_size() >= element_size, "invariant");
assert(_policy.thread_local_storage(thread) == buffer, "invariant");
assert(this->thread_local_storage(thread) == buffer, "invariant");
return buffer;
}
template <template <typename> class ElementPolicy>
void JfrEpochQueue<ElementPolicy>::enqueue(JfrEpochQueue<ElementPolicy>::TypePtr t) {
assert(t != NULL, "invariant");
size_t element_size = _policy.element_size(t);
assert(t != nullptr, "invariant");
size_t element_size = this->element_size(t);
BufferPtr buffer = storage_for_element(t, element_size);
assert(buffer != NULL, "invariant");
_policy.store_element(t, buffer);
assert(buffer != nullptr, "invariant");
this->store_element(t, buffer);
buffer->set_pos(element_size);
}
template <template <typename> class ElementPolicy>
template <typename Callback>
JfrEpochQueue<ElementPolicy>::ElementDispatch<Callback>::ElementDispatch(Callback& callback, JfrEpochQueue<ElementPolicy>::Policy& policy) :
_callback(callback),_policy(policy) {}
JfrEpochQueue<ElementPolicy>::ElementDispatch<Callback>::ElementDispatch(Callback& callback, JfrEpochQueue<ElementPolicy>& queue) :
_callback(callback), _queue(queue) {}
template <template <typename> class ElementPolicy>
template <typename Callback>
size_t JfrEpochQueue<ElementPolicy>::ElementDispatch<Callback>::operator()(const u1* element, bool previous_epoch) {
assert(element != NULL, "invariant");
return _policy(element, _callback, previous_epoch);
assert(element != nullptr, "invariant");
return _queue(element, _callback, previous_epoch);
}
template <template <typename> class ElementPolicy>
@ -92,7 +107,7 @@ template <typename Callback>
void JfrEpochQueue<ElementPolicy>::iterate(Callback& callback, bool previous_epoch) {
typedef ElementDispatch<Callback> ElementDispatcher;
typedef EpochDispatchOp<ElementDispatcher> QueueDispatcher;
ElementDispatcher element_dispatcher(callback, _policy);
ElementDispatcher element_dispatcher(callback, *this);
QueueDispatcher dispatch(element_dispatcher, previous_epoch);
_storage->iterate(dispatch, previous_epoch);
DEBUG_ONLY(_storage->verify_previous_empty();)