From 311a0a91ba520fcd0e163548e2d051d873aef51a Mon Sep 17 00:00:00 2001
From: Denghui Dong <ddong@openjdk.org>
Date: Wed, 27 Jan 2021 15:28:46 +0000
Subject: [PATCH] 8259808: Add JFR event to detect GC locker stall

Reviewed-by: sjohanss, tschatzl, egahlin
---
 src/hotspot/share/gc/shared/gcLocker.cpp      |   4 +
 src/hotspot/share/gc/shared/gcTrace.hpp       |  15 ++
 src/hotspot/share/gc/shared/gcTraceSend.cpp   |  45 ++++++
 src/hotspot/share/jfr/metadata/metadata.xml   |   5 +
 src/hotspot/share/prims/whitebox.cpp          |  12 ++
 src/jdk.jfr/share/conf/jfr/default.jfc        |   5 +
 src/jdk.jfr/share/conf/jfr/profile.jfc        |   5 +
 .../event/gc/detailed/TestGCLockerEvent.java  | 128 ++++++++++++++++++
 test/lib/jdk/test/lib/jfr/EventNames.java     |   1 +
 test/lib/sun/hotspot/WhiteBox.java            |   4 +
 10 files changed, 224 insertions(+)
 create mode 100644 test/jdk/jdk/jfr/event/gc/detailed/TestGCLockerEvent.java

diff --git a/src/hotspot/share/gc/shared/gcLocker.cpp b/src/hotspot/share/gc/shared/gcLocker.cpp
index 7ec7d23931f..e2cd41b6760 100644
--- a/src/hotspot/share/gc/shared/gcLocker.cpp
+++ b/src/hotspot/share/gc/shared/gcLocker.cpp
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "gc/shared/collectedHeap.hpp"
 #include "gc/shared/gcLocker.hpp"
+#include "gc/shared/gcTrace.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
 #include "logging/log.hpp"
@@ -97,6 +98,7 @@ bool GCLocker::check_active_before_gc() {
   if (is_active() && !_needs_gc) {
     verify_critical_count();
     _needs_gc = true;
+    GCLockerTracer::start_gc_locker(_jni_lock_count);
     log_debug_jni("Setting _needs_gc.");
   }
   return is_active();
@@ -107,6 +109,7 @@ void GCLocker::stall_until_clear() {
   MonitorLocker ml(JNICritical_lock);
 
   if (needs_gc()) {
+    GCLockerTracer::inc_stall_count();
     log_debug_jni("Allocation failed. Thread stalled by JNI critical section.");
   }
 
@@ -152,6 +155,7 @@ void GCLocker::jni_unlock(JavaThread* thread) {
     // getting the count, else there may be unnecessary GCLocker GCs.
     _total_collections = Universe::heap()->total_collections();
     _doing_gc = true;
+    GCLockerTracer::report_gc_locker();
     {
       // Must give up the lock while at a safepoint
       MutexUnlocker munlock(JNICritical_lock);
diff --git a/src/hotspot/share/gc/shared/gcTrace.hpp b/src/hotspot/share/gc/shared/gcTrace.hpp
index 94e8e7bbfc8..3db7831d5f3 100644
--- a/src/hotspot/share/gc/shared/gcTrace.hpp
+++ b/src/hotspot/share/gc/shared/gcTrace.hpp
@@ -207,4 +207,19 @@ class DefNewTracer : public YoungGCTracer {
   DefNewTracer() : YoungGCTracer(DefNew) {}
 };
 
+class GCLockerTracer : public AllStatic {
+#if INCLUDE_JFR
+private:
+  static Ticks _needs_gc_start_timestamp;
+  static volatile jint _jni_lock_count;
+  static volatile jint _stall_count;
+#endif
+
+  static bool is_started() NOT_JFR_RETURN_(false);
+
+public:
+  static void start_gc_locker(jint jni_lock_count) NOT_JFR_RETURN();
+  static void inc_stall_count() NOT_JFR_RETURN();
+  static void report_gc_locker() NOT_JFR_RETURN();
+};
 #endif // SHARE_GC_SHARED_GCTRACE_HPP
diff --git a/src/hotspot/share/gc/shared/gcTraceSend.cpp b/src/hotspot/share/gc/shared/gcTraceSend.cpp
index 4f300baf0dd..dec86a117b6 100644
--- a/src/hotspot/share/gc/shared/gcTraceSend.cpp
+++ b/src/hotspot/share/gc/shared/gcTraceSend.cpp
@@ -344,3 +344,48 @@ void GCTracer::send_phase_events(TimePartitions* time_partitions) const {
     phase->accept(&phase_reporter);
   }
 }
+
+#if INCLUDE_JFR
+Ticks GCLockerTracer::_needs_gc_start_timestamp;
+volatile jint GCLockerTracer::_jni_lock_count = 0;
+volatile jint GCLockerTracer::_stall_count = 0;
+
+bool GCLockerTracer::is_started() {
+  return _needs_gc_start_timestamp != Ticks();
+}
+
+void GCLockerTracer::start_gc_locker(const jint jni_lock_count) {
+  assert(SafepointSynchronize::is_at_safepoint(), "sanity");
+  assert(!is_started(), "sanity");
+  assert(_jni_lock_count == 0, "sanity");
+  assert(_stall_count == 0, "sanity");
+  if (EventGCLocker::is_enabled()) {
+    _needs_gc_start_timestamp.stamp();
+    _jni_lock_count = jni_lock_count;
+  }
+}
+
+void GCLockerTracer::inc_stall_count() {
+  if (is_started()) {
+    _stall_count++;
+  }
+}
+
+void GCLockerTracer::report_gc_locker() {
+  if (is_started()) {
+    EventGCLocker event(UNTIMED);
+    if (event.should_commit()) {
+      event.set_starttime(_needs_gc_start_timestamp);
+      event.set_lockCount(_jni_lock_count);
+      event.set_stallCount(_stall_count);
+      event.commit();
+    }
+    // reset
+    _needs_gc_start_timestamp = Ticks();
+    _jni_lock_count = 0;
+    _stall_count = 0;
+
+    assert(!is_started(), "sanity");
+  }
+}
+#endif
diff --git a/src/hotspot/share/jfr/metadata/metadata.xml b/src/hotspot/share/jfr/metadata/metadata.xml
index 111506067f9..352c22c3cc6 100644
--- a/src/hotspot/share/jfr/metadata/metadata.xml
+++ b/src/hotspot/share/jfr/metadata/metadata.xml
@@ -1090,6 +1090,11 @@
     <Field type="boolean" name="onOutOfMemoryError" label="On Out of Memory Error" />
   </Event>
 
+  <Event name="GCLocker" category="Java Virtual Machine, GC, Detailed" label="GC Locker" startTime="true" thread="true" stackTrace="true">
+    <Field type="uint" name="lockCount" label="Lock Count" description="The number of Java threads in a critical section when the GC locker is started" />
+    <Field type="uint" name="stallCount" label="Stall Count" description="The number of Java threads stalled by the GC locker" />
+  </Event>
+
   <Type name="DeoptimizationReason" label="Deoptimization Reason">
     <Field type="string" name="reason" label="Reason" />
   </Type>
diff --git a/src/hotspot/share/prims/whitebox.cpp b/src/hotspot/share/prims/whitebox.cpp
index 535875d8a21..4a5218c1e78 100644
--- a/src/hotspot/share/prims/whitebox.cpp
+++ b/src/hotspot/share/prims/whitebox.cpp
@@ -39,6 +39,7 @@
 #include "compiler/directivesParser.hpp"
 #include "gc/shared/concurrentGCBreakpoints.hpp"
 #include "gc/shared/gcConfig.hpp"
+#include "gc/shared/gcLocker.inline.hpp"
 #include "gc/shared/genArguments.hpp"
 #include "gc/shared/genCollectedHeap.hpp"
 #include "jvmtifiles/jvmtiEnv.hpp"
@@ -2318,6 +2319,14 @@ WB_ENTRY(jstring, WB_GetLibcName(JNIEnv* env, jobject o))
   return info_string;
 WB_END
 
+WB_ENTRY(void, WB_LockCritical(JNIEnv* env, jobject wb))
+  GCLocker::lock_critical(thread);
+WB_END
+
+WB_ENTRY(void, WB_UnlockCritical(JNIEnv* env, jobject wb))
+  GCLocker::unlock_critical(thread);
+WB_END
+
 #define CC (char*)
 
 static JNINativeMethod methods[] = {
@@ -2581,6 +2590,9 @@ static JNINativeMethod methods[] = {
   {CC"isJVMTIIncluded", CC"()Z",                      (void*)&WB_IsJVMTIIncluded},
   {CC"waitUnsafe", CC"(I)V",                          (void*)&WB_WaitUnsafe},
   {CC"getLibcName",     CC"()Ljava/lang/String;",     (void*)&WB_GetLibcName},
+
+  {CC"lockCritical",    CC"()V",                      (void*)&WB_LockCritical},
+  {CC"unlockCritical",  CC"()V",                      (void*)&WB_UnlockCritical},
 };
 
 
diff --git a/src/jdk.jfr/share/conf/jfr/default.jfc b/src/jdk.jfr/share/conf/jfr/default.jfc
index 5c102f7c05b..49e148c4393 100644
--- a/src/jdk.jfr/share/conf/jfr/default.jfc
+++ b/src/jdk.jfr/share/conf/jfr/default.jfc
@@ -783,6 +783,11 @@
       <setting name="period">5 s</setting>
     </event>
 
+    <event name="jdk.GCLocker">
+      <setting name="enabled">true</setting>
+      <setting name="threshold">1 s</setting>
+      <setting name="stackTrace">true</setting>
+    </event>
 
 
 
diff --git a/src/jdk.jfr/share/conf/jfr/profile.jfc b/src/jdk.jfr/share/conf/jfr/profile.jfc
index 52cbd2d72b4..5374f5b4e55 100644
--- a/src/jdk.jfr/share/conf/jfr/profile.jfc
+++ b/src/jdk.jfr/share/conf/jfr/profile.jfc
@@ -783,6 +783,11 @@
       <setting name="period">5 s</setting>
     </event>
 
+    <event name="jdk.GCLocker">
+      <setting name="enabled">true</setting>
+      <setting name="threshold">100 ms</setting>
+      <setting name="stackTrace">true</setting>
+    </event>
 
 
 
diff --git a/test/jdk/jdk/jfr/event/gc/detailed/TestGCLockerEvent.java b/test/jdk/jdk/jfr/event/gc/detailed/TestGCLockerEvent.java
new file mode 100644
index 00000000000..10420a959f9
--- /dev/null
+++ b/test/jdk/jdk/jfr/event/gc/detailed/TestGCLockerEvent.java
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2021, Alibaba Group Holding Limited. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Alibaba designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @test TestGCLockerEvent
+ * @key jfr
+ * @requires vm.hasJFR
+ * @library /test/lib
+ * @build sun.hotspot.WhiteBox
+ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xmx32m -Xms32m -Xmn12m -XX:+UseG1GC jdk.jfr.event.gc.detailed.TestGCLockerEvent
+ */
+
+package jdk.jfr.event.gc.detailed;
+
+import static jdk.test.lib.Asserts.assertTrue;
+
+import java.util.concurrent.CountDownLatch;
+
+import jdk.jfr.Recording;
+import jdk.jfr.consumer.RecordedEvent;
+import jdk.test.lib.jfr.EventNames;
+import jdk.test.lib.jfr.Events;
+
+import sun.hotspot.WhiteBox;
+
+public class TestGCLockerEvent {
+
+    private static final String EVENT_NAME = EventNames.GCLocker;
+
+    private static final int CRITICAL_THREAD_COUNT = 4;
+
+    private static final CountDownLatch LOCK_COUNT_SIGNAL = new CountDownLatch(CRITICAL_THREAD_COUNT);
+
+    private static final CountDownLatch UNLOCK_SIGNAL = new CountDownLatch(1);
+
+    private static final CountDownLatch UNLOCK_COUNT_SIGNAL = new CountDownLatch(CRITICAL_THREAD_COUNT);
+
+    private static final String CRITICAL_THREAD_NAME_PREFIX = "Critical Thread ";
+
+    private static final int STALL_THREAD_COUNT = 8;
+
+    private static final CountDownLatch STALL_COUNT_SIGNAL = new CountDownLatch(STALL_THREAD_COUNT);
+
+    private static final int LOOP = 32;
+
+    private static final int M = 1024 * 1024;
+
+    public static void main(String[] args) throws Exception {
+        var recording = new Recording();
+        recording.enable(EVENT_NAME);
+        recording.start();
+
+        startCriticalThreads();
+        LOCK_COUNT_SIGNAL.await();
+        startStallThreads();
+        STALL_COUNT_SIGNAL.await();
+
+        // Wait threads to be stalled
+        Thread.sleep(1500);
+
+        UNLOCK_SIGNAL.countDown();
+        UNLOCK_COUNT_SIGNAL.await();
+        recording.stop();
+
+        // Verify recording
+        var all = Events.fromRecording(recording);
+        Events.hasEvents(all);
+        var event = all.get(0);
+
+        assertTrue(Events.isEventType(event, EVENT_NAME));
+        Events.assertField(event, "lockCount").equal(CRITICAL_THREAD_COUNT);
+        Events.assertField(event, "stallCount").atLeast(STALL_THREAD_COUNT);
+        assertTrue(event.getThread().getJavaName().startsWith(CRITICAL_THREAD_NAME_PREFIX));
+
+        recording.close();
+    }
+
+    private static void startCriticalThreads() {
+        for (var i = 0; i < CRITICAL_THREAD_COUNT; i++) {
+            new Thread(() -> {
+                try {
+                    WhiteBox.getWhiteBox().lockCritical();
+                    LOCK_COUNT_SIGNAL.countDown();
+
+                    UNLOCK_SIGNAL.await();
+                    WhiteBox.getWhiteBox().unlockCritical();
+                    UNLOCK_COUNT_SIGNAL.countDown();
+                } catch (InterruptedException ex) {
+                }
+            }, CRITICAL_THREAD_NAME_PREFIX + i).start();
+        }
+    }
+
+    private static void startStallThreads() {
+        var ts = new Thread[STALL_THREAD_COUNT];
+        for (var i = 0; i < STALL_THREAD_COUNT; i++) {
+            ts[i] = new Thread(() -> {
+                STALL_COUNT_SIGNAL.countDown();
+                for (int j = 0; j < LOOP; j++) {
+                    byte[] bytes = new byte[M];
+                }
+            });
+        }
+        for (Thread t : ts) {
+            t.start();
+        }
+    }
+}
+
diff --git a/test/lib/jdk/test/lib/jfr/EventNames.java b/test/lib/jdk/test/lib/jfr/EventNames.java
index 3d383a95301..e95d265a0bb 100644
--- a/test/lib/jdk/test/lib/jfr/EventNames.java
+++ b/test/lib/jdk/test/lib/jfr/EventNames.java
@@ -149,6 +149,7 @@ public class EventNames {
     public final static String ZRelocationSetGroup = PREFIX + "ZRelocationSetGroup";
     public final static String ZUncommit = PREFIX + "ZUncommit";
     public final static String ZUnmap = PREFIX + "ZUnmap";
+    public final static String GCLocker = PREFIX + "GCLocker";
 
     // Compiler
     public final static String Compilation = PREFIX + "Compilation";
diff --git a/test/lib/sun/hotspot/WhiteBox.java b/test/lib/sun/hotspot/WhiteBox.java
index fc34a795300..c96fe21b282 100644
--- a/test/lib/sun/hotspot/WhiteBox.java
+++ b/test/lib/sun/hotspot/WhiteBox.java
@@ -643,4 +643,8 @@ public class WhiteBox {
   public native boolean isJVMTIIncluded();
 
   public native void waitUnsafe(int time_ms);
+
+  public native void lockCritical();
+
+  public native void unlockCritical();
 }