8247533: SA stack walking sometimes fails with sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a lwp

Reviewed-by: sspitsyn, ysuenaga, dtitov
This commit is contained in:
Chris Plummer 2020-07-02 13:13:10 -07:00
parent 8b7c959164
commit 4858141ce4
9 changed files with 61 additions and 27 deletions

View File

@ -414,7 +414,13 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
struct ps_prochandle* ph = get_proc_handle(env, this_obj);
if (get_lwp_regs(ph, lwp_id, &gregs) != true) {
THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0);
// This is not considered fatal and does happen on occassion, usually with an
// ESRCH error. The root cause is not fully understood, but by ignoring this error
// and returning NULL, stacking walking code will get null registers and fallback
// to using the "last java frame" if setup.
fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id);
fflush(stdout);
return NULL;
}
#undef NPRGREG

View File

@ -140,7 +140,8 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
#ifdef PTRACE_GETREGS_REQ
if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid,
errno, strerror(errno));
return false;
}
return true;

View File

@ -600,7 +600,7 @@ Java_sun_jvm_hotspot_debugger_bsd_BsdDebuggerLocal_getThreadIntegerRegisterSet0(
JNIEnv *env, jobject this_obj,
jlong thread_id)
{
print_debug("getThreadRegisterSet0 called\n");
print_debug("getThreadIntegerRegisterSet0 called\n");
struct ps_prochandle* ph = get_proc_handle(env, this_obj);
if (ph != NULL && ph->core != NULL) {
@ -620,7 +620,13 @@ Java_sun_jvm_hotspot_debugger_bsd_BsdDebuggerLocal_getThreadIntegerRegisterSet0(
result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count);
if (result != KERN_SUCCESS) {
print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result);
// This is not considered fatal. Unlike on Linux and Windows, we haven't seen a
// failure to get thread registers, but if it were to fail the response should
// be the same. By ignoring this error and returning NULL, stacking walking code
// will get null registers and fallback to using the "last java frame" if setup.
fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n",
result, tid);
fflush(stdout);
return NULL;
}

View File

@ -167,7 +167,7 @@ public class BsdDebuggerLocal extends DebuggerBase implements BsdDebugger {
} catch (InterruptedException x) {}
}
if (lastException != null) {
throw new DebuggerException(lastException);
throw new DebuggerException(lastException.getMessage(), lastException);
} else {
return task;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -67,9 +67,13 @@ class BsdThread implements ThreadProxy {
public ThreadContext getContext() throws IllegalThreadStateException {
long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id);
ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger);
// null means we failed to get the register set for some reason. The caller
// is responsible for dealing with the set of null registers in that case.
if (data != null) {
for (int i = 0; i < data.length; i++) {
context.setRegister(i, data[i]);
}
}
return context;
}

View File

@ -186,7 +186,7 @@ public class LinuxDebuggerLocal extends DebuggerBase implements LinuxDebugger {
} catch (InterruptedException x) {}
}
if (lastException != null) {
throw new DebuggerException(lastException);
throw new DebuggerException(lastException.getMessage(), lastException);
} else {
return task;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -73,9 +73,13 @@ class LinuxThread implements ThreadProxy {
public ThreadContext getContext() throws IllegalThreadStateException {
long[] data = debugger.getThreadIntegerRegisterSet(lwp_id);
ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger);
// null means we failed to get the register set for some reason. The caller
// is responsible for dealing with the set of null registers in that case.
if (data != null) {
for (int i = 0; i < data.length; i++) {
context.setRegister(i, data[i]);
}
}
return context;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -30,9 +30,9 @@ import sun.jvm.hotspot.debugger.windbg.*;
class WindbgAMD64Thread implements ThreadProxy {
private WindbgDebugger debugger;
private long sysId;
private long sysId; // SystemID for Windows thread, stored in OSThread::_thread_id
private boolean gotID;
private long id;
private long id; // ThreadID for Windows thread, returned by GetThreadIdBySystemId
// The address argument must be the address of the OSThread::_thread_id
WindbgAMD64Thread(WindbgDebugger debugger, Address addr) {
@ -50,9 +50,13 @@ class WindbgAMD64Thread implements ThreadProxy {
public ThreadContext getContext() throws IllegalThreadStateException {
long[] data = debugger.getThreadIntegerRegisterSet(getThreadID());
WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger);
// null means we failed to get the register set for some reason. The caller
// is responsible for dealing with the set of null registers in that case.
if (data != null) {
for (int i = 0; i < data.length; i++) {
context.setRegister(i, data[i]);
}
}
return context;
}
@ -86,6 +90,7 @@ class WindbgAMD64Thread implements ThreadProxy {
private long getThreadID() {
if (!gotID) {
id = debugger.getThreadIdFromSysId(sysId);
gotID = true;
}
return id;

View File

@ -42,6 +42,7 @@
#include <limits.h>
#include <windows.h>
#include <inttypes.h>
#define DEBUG_NO_IMPLEMENTATION
#include <dbgeng.h>
@ -764,9 +765,16 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal
CHECK_EXCEPTION_(0);
ULONG id = 0;
COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id),
"Windbg Error: GetThreadIdBySystemId failed!", 0);
HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id);
if (hr != S_OK) {
// This is not considered fatal and does happen on occassion, usually with an
// 0x80004002 "No such interface supported". The root cause is not fully understood,
// but by ignoring this error and returning NULL, stacking walking code will get
// null registers and fallback to using the "last java frame" if setup.
printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n",
hr, sysId);
return -1;
}
return (jlong) id;
}