8219584: Try to dump error file by thread which causes safepoint timeout

Reviewed-by: stuefe, dholmes, kvn
This commit is contained in:
Martin Doerr 2019-03-08 11:23:30 +01:00
parent cafb914cca
commit 97ec4aeed2
7 changed files with 151 additions and 3 deletions

View File

@ -31,6 +31,7 @@
#include "runtime/interfaceSupport.inline.hpp"
#include "services/memTracker.hpp"
#include "utilities/align.hpp"
#include "utilities/events.hpp"
#include "utilities/formatBuffer.hpp"
#include "utilities/macros.hpp"
#include "utilities/vmError.hpp"
@ -1269,6 +1270,15 @@ static bool get_signal_code_description(const siginfo_t* si, enum_sigcode_desc_t
return true;
}
bool os::signal_sent_by_kill(const void* siginfo) {
const siginfo_t* const si = (const siginfo_t*)siginfo;
return si->si_code == SI_USER || si->si_code == SI_QUEUE
#ifdef SI_TKILL
|| si->si_code == SI_TKILL
#endif
;
}
void os::print_siginfo(outputStream* os, const void* si0) {
const siginfo_t* const si = (const siginfo_t*) si0;
@ -1299,7 +1309,7 @@ void os::print_siginfo(outputStream* os, const void* si0) {
// so it depends on the context which member to use. For synchronous error signals,
// we print si_addr, unless the signal was sent by another process or thread, in
// which case we print out pid or tid of the sender.
if (si->si_code == SI_USER || si->si_code == SI_QUEUE) {
if (signal_sent_by_kill(si)) {
const pid_t pid = si->si_pid;
os->print(", si_pid: %ld", (long) pid);
if (IS_VALID_PID(pid)) {
@ -1325,6 +1335,25 @@ void os::print_siginfo(outputStream* os, const void* si0) {
}
bool os::signal_thread(Thread* thread, int sig, const char* reason) {
OSThread* osthread = thread->osthread();
if (osthread) {
#if defined (SOLARIS)
// Note: we cannot use pthread_kill on Solaris - not because
// its missing, but because we do not have the pthread_t id.
int status = thr_kill(osthread->thread_id(), sig);
#else
int status = pthread_kill(osthread->pthread_id(), sig);
#endif
if (status == 0) {
Events::log(Thread::current(), "sent signal %d to Thread " INTPTR_FORMAT " because %s.",
sig, p2i(thread), reason);
return true;
}
}
return false;
}
int os::Posix::unblock_thread_signal_mask(const sigset_t *set) {
return pthread_sigmask(SIG_UNBLOCK, set, NULL);
}

View File

@ -1797,6 +1797,11 @@ void os::print_memory_info(outputStream* st) {
st->cr();
}
bool os::signal_sent_by_kill(const void* siginfo) {
// TODO: Is this possible?
return false;
}
void os::print_siginfo(outputStream *st, const void* siginfo) {
const EXCEPTION_RECORD* const er = (EXCEPTION_RECORD*)siginfo;
st->print("siginfo:");
@ -1830,6 +1835,11 @@ void os::print_siginfo(outputStream *st, const void* siginfo) {
st->cr();
}
bool os::signal_thread(Thread* thread, int sig, const char* reason) {
// TODO: Can we kill thread?
return false;
}
void os::print_signal_handlers(outputStream* st, char* buf, size_t buflen) {
// do nothing
}

View File

@ -367,7 +367,7 @@ define_pd_global(uint64_t,MaxRAM, 1ULL*G);
"Print out every time compilation is longer than " \
"a given threshold") \
\
develop(bool, SafepointALot, false, \
diagnostic(bool, SafepointALot, false, \
"Generate a lot of safepoints. This works with " \
"GuaranteedSafepointInterval") \
\

View File

@ -463,6 +463,9 @@ class os: AllStatic {
static void pd_start_thread(Thread* thread);
static void start_thread(Thread* thread);
// Returns true if successful.
static bool signal_thread(Thread* thread, int sig, const char* reason);
static void free_thread(OSThread* osthread);
// thread id on Linux/64bit is 64bit, on Windows and Solaris, it's 32bit
@ -637,6 +640,7 @@ class os: AllStatic {
static void print_environment_variables(outputStream* st, const char** env_list);
static void print_context(outputStream* st, const void* context);
static void print_register_info(outputStream* st, const void* context);
static bool signal_sent_by_kill(const void* siginfo);
static void print_siginfo(outputStream* st, const void* siginfo);
static void print_signal_handlers(outputStream* st, char* buf, size_t buflen);
static void print_date_and_time(outputStream* st, char* buf, size_t buflen);

View File

@ -902,6 +902,16 @@ void SafepointSynchronize::print_safepoint_timeout() {
// To debug the long safepoint, specify both AbortVMOnSafepointTimeout &
// ShowMessageBoxOnError.
if (AbortVMOnSafepointTimeout) {
// Send the blocking thread a signal to terminate and write an error file.
for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur_thread = jtiwh.next(); ) {
if (cur_thread->safepoint_state()->is_running()) {
if (!os::signal_thread(cur_thread, SIGILL, "blocking a safepoint")) {
break; // Could not send signal. Report fatal error.
}
// Give cur_thread a chance to report the error and terminate the VM.
os::sleep(Thread::current(), 3000, false);
}
}
fatal("Safepoint sync time longer than " INTX_FORMAT "ms detected when executing %s.",
SafepointTimeoutDelay, VMThread::vm_operation()->name());
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -522,6 +522,9 @@ void VMError::report(outputStream* st, bool _verbose) {
st->print("%s", buf);
st->print(" (0x%x)", _id); // signal number
st->print(" at pc=" PTR_FORMAT, p2i(_pc));
if (_siginfo != NULL && os::signal_sent_by_kill(_siginfo)) {
st->print(" (sent by kill)");
}
} else {
if (should_report_bug(_id)) {
st->print("Internal Error");

View File

@ -0,0 +1,92 @@
/*
* Copyright (c) 2019, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
import jdk.test.lib.*;
import jdk.test.lib.process.*;
/*
* @test TestAbortVMOnSafepointTimeout
* @summary Check if VM can kill thread which doesn't reach safepoint.
* @bug 8219584
* @requires vm.compiler2.enabled
* @library /test/lib
* @modules java.base/jdk.internal.misc
* java.management
*/
public class TestAbortVMOnSafepointTimeout {
public static void main(String[] args) throws Exception {
if (args.length > 0) {
int result = test_loop(3);
System.out.println("This message would occur after some time with result " + result);
return;
}
testWith(500, 500);
}
static int test_loop(int x) {
int sum = 0;
if (x != 0) {
// Long running loop without safepoint.
for (int y = 1; y < Integer.MAX_VALUE; ++y) {
if (y % x == 0) ++sum;
}
}
return sum;
}
public static void testWith(int sfpt_interval, int timeout_delay) throws Exception {
ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
"-XX:+UnlockDiagnosticVMOptions",
"-XX:+SafepointTimeout",
"-XX:+SafepointALot",
"-XX:+AbortVMOnSafepointTimeout",
"-XX:SafepointTimeoutDelay=" + timeout_delay,
"-XX:GuaranteedSafepointInterval=" + sfpt_interval,
"-XX:-TieredCompilation",
"-XX:-UseCountedLoopSafepoints",
"-XX:LoopStripMiningIter=0",
"-XX:LoopUnrollLimit=0",
"-XX:CompileCommand=compileonly,TestAbortVMOnSafepointTimeout::test_loop",
"-Xcomp",
"-XX:-CreateCoredumpOnCrash",
"-Xms64m",
"TestAbortVMOnSafepointTimeout",
"runTestLoop"
);
OutputAnalyzer output = new OutputAnalyzer(pb.start());
if (Platform.isWindows()) {
output.shouldMatch("Safepoint sync time longer than");
} else {
output.shouldMatch("SIGILL");
if (Platform.isLinux()) {
output.shouldMatch("(sent by kill)");
}
output.shouldMatch("TestAbortVMOnSafepointTimeout.test_loop");
}
output.shouldNotHaveExitValue(0);
}
}