From 4858141ce4ed44e49c99c047dd773110522259b2 Mon Sep 17 00:00:00 2001 From: Chris Plummer Date: Thu, 2 Jul 2020 13:13:10 -0700 Subject: [PATCH] 8247533: SA stack walking sometimes fails with sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a lwp Reviewed-by: sspitsyn, ysuenaga, dtitov --- .../native/libsaproc/LinuxDebuggerLocal.cpp | 8 ++++++- .../linux/native/libsaproc/ps_proc.c | 3 ++- .../native/libsaproc/MacosxDebuggerLocal.m | 24 ++++++++++++------- .../debugger/bsd/BsdDebuggerLocal.java | 2 +- .../jvm/hotspot/debugger/bsd/BsdThread.java | 10 +++++--- .../debugger/linux/LinuxDebuggerLocal.java | 2 +- .../hotspot/debugger/linux/LinuxThread.java | 10 +++++--- .../windbg/amd64/WindbgAMD64Thread.java | 15 ++++++++---- .../windows/native/libsaproc/sawindbg.cpp | 14 ++++++++--- 9 files changed, 61 insertions(+), 27 deletions(-) diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp index 9e8be03e04e..ca8b6208019 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp @@ -414,7 +414,13 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo struct ps_prochandle* ph = get_proc_handle(env, this_obj); if (get_lwp_regs(ph, lwp_id, &gregs) != true) { - THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0); + // This is not considered fatal and does happen on occassion, usually with an + // ESRCH error. The root cause is not fully understood, but by ignoring this error + // and returning NULL, stacking walking code will get null registers and fallback + // to using the "last java frame" if setup. + fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id); + fflush(stdout); + return NULL; } #undef NPRGREG diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c index c1fdf0f7496..72d496f5224 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c @@ -140,7 +140,8 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use #ifdef PTRACE_GETREGS_REQ if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { - print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); + print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid, + errno, strerror(errno)); return false; } return true; diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m index 351659f35a5..fdc99b1e33b 100644 --- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m +++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m @@ -600,7 +600,7 @@ Java_sun_jvm_hotspot_debugger_bsd_BsdDebuggerLocal_getThreadIntegerRegisterSet0( JNIEnv *env, jobject this_obj, jlong thread_id) { - print_debug("getThreadRegisterSet0 called\n"); + print_debug("getThreadIntegerRegisterSet0 called\n"); struct ps_prochandle* ph = get_proc_handle(env, this_obj); if (ph != NULL && ph->core != NULL) { @@ -620,7 +620,13 @@ Java_sun_jvm_hotspot_debugger_bsd_BsdDebuggerLocal_getThreadIntegerRegisterSet0( result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count); if (result != KERN_SUCCESS) { - print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result); + // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a + // failure to get thread registers, but if it were to fail the response should + // be the same. By ignoring this error and returning NULL, stacking walking code + // will get null registers and fallback to using the "last java frame" if setup. + fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n", + result, tid); + fflush(stdout); return NULL; } @@ -681,25 +687,25 @@ Java_sun_jvm_hotspot_debugger_bsd_BsdDebuggerLocal_getThreadIntegerRegisterSet0( */ JNIEXPORT jint JNICALL Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0( - JNIEnv *env, jobject this_obj, jint tid) + JNIEnv *env, jobject this_obj, jint tid) { print_debug("translateTID0 called on tid = 0x%x\n", (int)tid); kern_return_t result; thread_t foreign_tid, usable_tid; mach_msg_type_name_t type; - + foreign_tid = tid; - + task_t gTask = getTask(env, this_obj); - result = mach_port_extract_right(gTask, foreign_tid, - MACH_MSG_TYPE_COPY_SEND, + result = mach_port_extract_right(gTask, foreign_tid, + MACH_MSG_TYPE_COPY_SEND, &usable_tid, &type); if (result != KERN_SUCCESS) return -1; - + print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid); - + return (jint) usable_tid; } diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java index 91c561d0abf..17f0aa4008b 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java @@ -167,7 +167,7 @@ public class BsdDebuggerLocal extends DebuggerBase implements BsdDebugger { } catch (InterruptedException x) {} } if (lastException != null) { - throw new DebuggerException(lastException); + throw new DebuggerException(lastException.getMessage(), lastException); } else { return task; } diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java index 0d637f30f14..c52d3a51d54 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -67,8 +67,12 @@ class BsdThread implements ThreadProxy { public ThreadContext getContext() throws IllegalThreadStateException { long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id); ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger); - for (int i = 0; i < data.length; i++) { - context.setRegister(i, data[i]); + // null means we failed to get the register set for some reason. The caller + // is responsible for dealing with the set of null registers in that case. + if (data != null) { + for (int i = 0; i < data.length; i++) { + context.setRegister(i, data[i]); + } } return context; } diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java index 4b857e51c58..a9a270afe87 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java @@ -186,7 +186,7 @@ public class LinuxDebuggerLocal extends DebuggerBase implements LinuxDebugger { } catch (InterruptedException x) {} } if (lastException != null) { - throw new DebuggerException(lastException); + throw new DebuggerException(lastException.getMessage(), lastException); } else { return task; } diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java index 52307b9cdcf..3fe795d34bc 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -73,8 +73,12 @@ class LinuxThread implements ThreadProxy { public ThreadContext getContext() throws IllegalThreadStateException { long[] data = debugger.getThreadIntegerRegisterSet(lwp_id); ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger); - for (int i = 0; i < data.length; i++) { - context.setRegister(i, data[i]); + // null means we failed to get the register set for some reason. The caller + // is responsible for dealing with the set of null registers in that case. + if (data != null) { + for (int i = 0; i < data.length; i++) { + context.setRegister(i, data[i]); + } } return context; } diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java index ec5aea35e8c..377650a0a1c 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,9 +30,9 @@ import sun.jvm.hotspot.debugger.windbg.*; class WindbgAMD64Thread implements ThreadProxy { private WindbgDebugger debugger; - private long sysId; + private long sysId; // SystemID for Windows thread, stored in OSThread::_thread_id private boolean gotID; - private long id; + private long id; // ThreadID for Windows thread, returned by GetThreadIdBySystemId // The address argument must be the address of the OSThread::_thread_id WindbgAMD64Thread(WindbgDebugger debugger, Address addr) { @@ -50,8 +50,12 @@ class WindbgAMD64Thread implements ThreadProxy { public ThreadContext getContext() throws IllegalThreadStateException { long[] data = debugger.getThreadIntegerRegisterSet(getThreadID()); WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger); - for (int i = 0; i < data.length; i++) { - context.setRegister(i, data[i]); + // null means we failed to get the register set for some reason. The caller + // is responsible for dealing with the set of null registers in that case. + if (data != null) { + for (int i = 0; i < data.length; i++) { + context.setRegister(i, data[i]); + } } return context; } @@ -86,6 +90,7 @@ class WindbgAMD64Thread implements ThreadProxy { private long getThreadID() { if (!gotID) { id = debugger.getThreadIdFromSysId(sysId); + gotID = true; } return id; diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp index 8873d4a9302..c8ad8991b2f 100644 --- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp +++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp @@ -42,6 +42,7 @@ #include #include +#include #define DEBUG_NO_IMPLEMENTATION #include @@ -764,9 +765,16 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal CHECK_EXCEPTION_(0); ULONG id = 0; - COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id), - "Windbg Error: GetThreadIdBySystemId failed!", 0); - + HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id); + if (hr != S_OK) { + // This is not considered fatal and does happen on occassion, usually with an + // 0x80004002 "No such interface supported". The root cause is not fully understood, + // but by ignoring this error and returning NULL, stacking walking code will get + // null registers and fallback to using the "last java frame" if setup. + printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n", + hr, sysId); + return -1; + } return (jlong) id; }