8303861: Error handling step timeouts should never be blocked by OnError and others
Reviewed-by: dholmes, rkennke
This commit is contained in:
parent
da044dd569
commit
a00f5d24d3
@ -270,8 +270,8 @@ void WatcherThread::run() {
|
||||
os::die();
|
||||
}
|
||||
|
||||
// Wait a second, then recheck for timeout.
|
||||
os::naked_short_sleep(999);
|
||||
// Wait a bit, then recheck for timeout.
|
||||
os::naked_short_sleep(250);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2017, 2020 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2023, Red Hat, Inc. and/or its affiliates.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -1744,43 +1745,68 @@ void VMError::show_message_box(char *buf, int buflen) {
|
||||
} while (yes);
|
||||
}
|
||||
|
||||
// Timeout handling: check if a timeout happened (either a single step did
|
||||
// timeout or the whole of error reporting hit ErrorLogTimeout). Interrupt
|
||||
// the reporting thread if that is the case.
|
||||
// Fatal error handling is subject to several timeouts:
|
||||
// - a global timeout (controlled via ErrorLogTimeout)
|
||||
// - local error reporting step timeouts.
|
||||
//
|
||||
// The latter aims to "give the JVM a kick" if it gets stuck in one particular place during
|
||||
// error reporting. This prevents one error reporting step from hogging all the time allotted
|
||||
// to error reporting under ErrorLogTimeout.
|
||||
//
|
||||
// VMError::check_timeout() is called from the watcher thread and checks for either global
|
||||
// or step timeout. If a timeout happened, we interrupt the reporting thread and set either
|
||||
// _reporting_did_timeout or _step_did_timeout to signal which timeout fired. Function returns
|
||||
// true if the *global* timeout fired, which will cause WatcherThread to shut down the JVM
|
||||
// immediately.
|
||||
bool VMError::check_timeout() {
|
||||
|
||||
// This function is supposed to be called from watcher thread during fatal error handling only.
|
||||
assert(VMError::is_error_reported(), "Only call during error handling");
|
||||
assert(Thread::current()->is_Watcher_thread(), "Only call from watcher thread");
|
||||
|
||||
if (ErrorLogTimeout == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Do not check for timeouts if we still have a message box to show to the
|
||||
// user or if there are OnError handlers to be run.
|
||||
if (ShowMessageBoxOnError
|
||||
|| (OnError != nullptr && OnError[0] != '\0')
|
||||
|| Arguments::abort_hook() != nullptr) {
|
||||
return false;
|
||||
}
|
||||
// There are three situations where we suppress the *global* error timeout:
|
||||
// - if the JVM is embedded and the launcher has its abort hook installed.
|
||||
// That must be allowed to run.
|
||||
// - if the user specified one or more OnError commands to run, and these
|
||||
// did not yet run. These must have finished.
|
||||
// - if the user (typically developer) specified ShowMessageBoxOnError,
|
||||
// and the error box has not yet been shown
|
||||
const bool ignore_global_timeout =
|
||||
(ShowMessageBoxOnError
|
||||
|| (OnError != nullptr && OnError[0] != '\0')
|
||||
|| Arguments::abort_hook() != nullptr);
|
||||
|
||||
const jlong reporting_start_time_l = get_reporting_start_time();
|
||||
const jlong now = get_current_timestamp();
|
||||
// Timestamp is stored in nanos.
|
||||
if (reporting_start_time_l > 0) {
|
||||
const jlong end = reporting_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR;
|
||||
if (end <= now && !_reporting_did_timeout) {
|
||||
// We hit ErrorLogTimeout and we haven't interrupted the reporting
|
||||
// thread yet.
|
||||
_reporting_did_timeout = true;
|
||||
interrupt_reporting_thread();
|
||||
return true; // global timeout
|
||||
|
||||
// Global timeout hit?
|
||||
if (!ignore_global_timeout) {
|
||||
const jlong reporting_start_time = get_reporting_start_time();
|
||||
// Timestamp is stored in nanos.
|
||||
if (reporting_start_time > 0) {
|
||||
const jlong end = reporting_start_time + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR;
|
||||
if (end <= now && !_reporting_did_timeout) {
|
||||
// We hit ErrorLogTimeout and we haven't interrupted the reporting
|
||||
// thread yet.
|
||||
_reporting_did_timeout = true;
|
||||
interrupt_reporting_thread();
|
||||
return true; // global timeout
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const jlong step_start_time_l = get_step_start_time();
|
||||
if (step_start_time_l > 0) {
|
||||
// Reporting step timeout?
|
||||
const jlong step_start_time = get_step_start_time();
|
||||
if (step_start_time > 0) {
|
||||
// A step times out after a quarter of the total timeout. Steps are mostly fast unless they
|
||||
// hang for some reason, so this simple rule allows for three hanging step and still
|
||||
// hopefully leaves time enough for the rest of the steps to finish.
|
||||
const jlong end = step_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR / 4;
|
||||
const int max_step_timeout_secs = 5;
|
||||
const jlong timeout_duration = MAX2((jlong)max_step_timeout_secs, (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR / 4);
|
||||
const jlong end = step_start_time + timeout_duration;
|
||||
if (end <= now && !_step_did_timeout) {
|
||||
// The step timed out and we haven't interrupted the reporting
|
||||
// thread yet.
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2022 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2023, Red Hat, Inc. and/or its affiliates.
|
||||
* Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -26,6 +27,9 @@ import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import jdk.test.lib.process.OutputAnalyzer;
|
||||
@ -33,7 +37,7 @@ import jdk.test.lib.Platform;
|
||||
import jdk.test.lib.process.ProcessTools;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @test id=default
|
||||
* @bug 8166944
|
||||
* @summary Hanging Error Reporting steps may lead to torn error logs
|
||||
* @modules java.base/jdk.internal.misc
|
||||
@ -43,6 +47,16 @@ import jdk.test.lib.process.ProcessTools;
|
||||
* @author Thomas Stuefe (SAP)
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=with-on-error
|
||||
* @bug 8303861
|
||||
* @summary Error handling step timeouts should never be blocked by OnError etc.
|
||||
* @modules java.base/jdk.internal.misc
|
||||
* @library /test/lib
|
||||
* @requires (vm.debug == true) & (os.family != "windows")
|
||||
* @run driver TimeoutInErrorHandlingTest with-on-error
|
||||
*/
|
||||
|
||||
public class TimeoutInErrorHandlingTest {
|
||||
|
||||
public static final boolean verbose = System.getProperty("verbose") != null;
|
||||
@ -71,14 +85,28 @@ public class TimeoutInErrorHandlingTest {
|
||||
* little timeout messages to see that repeated timeout handling is basically working.
|
||||
*/
|
||||
|
||||
ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
|
||||
"-XX:+UnlockDiagnosticVMOptions",
|
||||
"-Xmx100M",
|
||||
"-XX:ErrorHandlerTest=14",
|
||||
"-XX:+TestUnresponsiveErrorHandler",
|
||||
"-XX:ErrorLogTimeout=" + ERROR_LOG_TIMEOUT,
|
||||
"-XX:-CreateCoredumpOnCrash",
|
||||
"-version");
|
||||
boolean withOnError = false;
|
||||
|
||||
if (args.length > 0) {
|
||||
switch (args[0]) {
|
||||
case "with-on-error": withOnError = true; break;
|
||||
default: throw new RuntimeException("Invalid argument " + args[1]);
|
||||
}
|
||||
}
|
||||
|
||||
List<String> arguments = new ArrayList<>();
|
||||
Collections.addAll(arguments,
|
||||
"-XX:+UnlockDiagnosticVMOptions",
|
||||
"-Xmx100M",
|
||||
"-XX:ErrorHandlerTest=14",
|
||||
"-XX:+TestUnresponsiveErrorHandler",
|
||||
"-XX:ErrorLogTimeout=" + ERROR_LOG_TIMEOUT,
|
||||
"-XX:-CreateCoredumpOnCrash");
|
||||
if (withOnError) {
|
||||
arguments.add("-XX:OnError=echo hi");
|
||||
}
|
||||
arguments.add("-version");
|
||||
ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(arguments);
|
||||
|
||||
OutputAnalyzer output_detail = new OutputAnalyzer(pb.start());
|
||||
|
||||
@ -92,8 +120,10 @@ public class TimeoutInErrorHandlingTest {
|
||||
output_detail.shouldMatch("# A fatal error has been detected by the Java Runtime Environment:.*");
|
||||
output_detail.shouldMatch("# +(?:SIGSEGV|SIGBUS|EXCEPTION_ACCESS_VIOLATION).*");
|
||||
|
||||
// VM should have been aborted by WatcherThread
|
||||
output_detail.shouldMatch(".*timer expired, abort.*");
|
||||
// Unless we specified OnError, VM should have been aborted by WatcherThread
|
||||
if (!withOnError) {
|
||||
output_detail.shouldMatch(".*timer expired, abort.*");
|
||||
}
|
||||
|
||||
// extract hs-err file
|
||||
File hs_err_file;
|
||||
|
Loading…
Reference in New Issue
Block a user