From a00f5d24d3824e3ab84208401a967efe0e7bf88e Mon Sep 17 00:00:00 2001 From: Thomas Stuefe Date: Tue, 14 Mar 2023 15:51:20 +0000 Subject: [PATCH] 8303861: Error handling step timeouts should never be blocked by OnError and others Reviewed-by: dholmes, rkennke --- src/hotspot/share/runtime/nonJavaThread.cpp | 4 +- src/hotspot/share/utilities/vmError.cpp | 72 +++++++++++++------ .../TimeoutInErrorHandlingTest.java | 52 +++++++++++--- 3 files changed, 92 insertions(+), 36 deletions(-) diff --git a/src/hotspot/share/runtime/nonJavaThread.cpp b/src/hotspot/share/runtime/nonJavaThread.cpp index 93c12739e44..3fbfa080e1f 100644 --- a/src/hotspot/share/runtime/nonJavaThread.cpp +++ b/src/hotspot/share/runtime/nonJavaThread.cpp @@ -270,8 +270,8 @@ void WatcherThread::run() { os::die(); } - // Wait a second, then recheck for timeout. - os::naked_short_sleep(999); + // Wait a bit, then recheck for timeout. + os::naked_short_sleep(250); } } diff --git a/src/hotspot/share/utilities/vmError.cpp b/src/hotspot/share/utilities/vmError.cpp index 8ff3fa6c51b..d3c2f542b7a 100644 --- a/src/hotspot/share/utilities/vmError.cpp +++ b/src/hotspot/share/utilities/vmError.cpp @@ -1,6 +1,7 @@ /* * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2017, 2020 SAP SE. All rights reserved. + * Copyright (c) 2023, Red Hat, Inc. and/or its affiliates. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1744,43 +1745,68 @@ void VMError::show_message_box(char *buf, int buflen) { } while (yes); } -// Timeout handling: check if a timeout happened (either a single step did -// timeout or the whole of error reporting hit ErrorLogTimeout). Interrupt -// the reporting thread if that is the case. +// Fatal error handling is subject to several timeouts: +// - a global timeout (controlled via ErrorLogTimeout) +// - local error reporting step timeouts. +// +// The latter aims to "give the JVM a kick" if it gets stuck in one particular place during +// error reporting. This prevents one error reporting step from hogging all the time allotted +// to error reporting under ErrorLogTimeout. +// +// VMError::check_timeout() is called from the watcher thread and checks for either global +// or step timeout. If a timeout happened, we interrupt the reporting thread and set either +// _reporting_did_timeout or _step_did_timeout to signal which timeout fired. Function returns +// true if the *global* timeout fired, which will cause WatcherThread to shut down the JVM +// immediately. bool VMError::check_timeout() { + // This function is supposed to be called from watcher thread during fatal error handling only. + assert(VMError::is_error_reported(), "Only call during error handling"); + assert(Thread::current()->is_Watcher_thread(), "Only call from watcher thread"); + if (ErrorLogTimeout == 0) { return false; } - // Do not check for timeouts if we still have a message box to show to the - // user or if there are OnError handlers to be run. - if (ShowMessageBoxOnError - || (OnError != nullptr && OnError[0] != '\0') - || Arguments::abort_hook() != nullptr) { - return false; - } + // There are three situations where we suppress the *global* error timeout: + // - if the JVM is embedded and the launcher has its abort hook installed. + // That must be allowed to run. + // - if the user specified one or more OnError commands to run, and these + // did not yet run. These must have finished. + // - if the user (typically developer) specified ShowMessageBoxOnError, + // and the error box has not yet been shown + const bool ignore_global_timeout = + (ShowMessageBoxOnError + || (OnError != nullptr && OnError[0] != '\0') + || Arguments::abort_hook() != nullptr); - const jlong reporting_start_time_l = get_reporting_start_time(); const jlong now = get_current_timestamp(); - // Timestamp is stored in nanos. - if (reporting_start_time_l > 0) { - const jlong end = reporting_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR; - if (end <= now && !_reporting_did_timeout) { - // We hit ErrorLogTimeout and we haven't interrupted the reporting - // thread yet. - _reporting_did_timeout = true; - interrupt_reporting_thread(); - return true; // global timeout + + // Global timeout hit? + if (!ignore_global_timeout) { + const jlong reporting_start_time = get_reporting_start_time(); + // Timestamp is stored in nanos. + if (reporting_start_time > 0) { + const jlong end = reporting_start_time + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR; + if (end <= now && !_reporting_did_timeout) { + // We hit ErrorLogTimeout and we haven't interrupted the reporting + // thread yet. + _reporting_did_timeout = true; + interrupt_reporting_thread(); + return true; // global timeout + } } } - const jlong step_start_time_l = get_step_start_time(); - if (step_start_time_l > 0) { + // Reporting step timeout? + const jlong step_start_time = get_step_start_time(); + if (step_start_time > 0) { // A step times out after a quarter of the total timeout. Steps are mostly fast unless they // hang for some reason, so this simple rule allows for three hanging step and still // hopefully leaves time enough for the rest of the steps to finish. - const jlong end = step_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR / 4; + const int max_step_timeout_secs = 5; + const jlong timeout_duration = MAX2((jlong)max_step_timeout_secs, (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR / 4); + const jlong end = step_start_time + timeout_duration; if (end <= now && !_step_did_timeout) { // The step timed out and we haven't interrupted the reporting // thread yet. diff --git a/test/hotspot/jtreg/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java b/test/hotspot/jtreg/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java index 1fc57e248be..0c3c2997b0e 100644 --- a/test/hotspot/jtreg/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java +++ b/test/hotspot/jtreg/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java @@ -1,5 +1,6 @@ /* * Copyright (c) 2017, 2022 SAP SE. All rights reserved. + * Copyright (c) 2023, Red Hat, Inc. and/or its affiliates. * Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -26,6 +27,9 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import java.util.regex.Pattern; import jdk.test.lib.process.OutputAnalyzer; @@ -33,7 +37,7 @@ import jdk.test.lib.Platform; import jdk.test.lib.process.ProcessTools; /* - * @test + * @test id=default * @bug 8166944 * @summary Hanging Error Reporting steps may lead to torn error logs * @modules java.base/jdk.internal.misc @@ -43,6 +47,16 @@ import jdk.test.lib.process.ProcessTools; * @author Thomas Stuefe (SAP) */ +/* + * @test id=with-on-error + * @bug 8303861 + * @summary Error handling step timeouts should never be blocked by OnError etc. + * @modules java.base/jdk.internal.misc + * @library /test/lib + * @requires (vm.debug == true) & (os.family != "windows") + * @run driver TimeoutInErrorHandlingTest with-on-error + */ + public class TimeoutInErrorHandlingTest { public static final boolean verbose = System.getProperty("verbose") != null; @@ -71,14 +85,28 @@ public class TimeoutInErrorHandlingTest { * little timeout messages to see that repeated timeout handling is basically working. */ - ProcessBuilder pb = ProcessTools.createJavaProcessBuilder( - "-XX:+UnlockDiagnosticVMOptions", - "-Xmx100M", - "-XX:ErrorHandlerTest=14", - "-XX:+TestUnresponsiveErrorHandler", - "-XX:ErrorLogTimeout=" + ERROR_LOG_TIMEOUT, - "-XX:-CreateCoredumpOnCrash", - "-version"); + boolean withOnError = false; + + if (args.length > 0) { + switch (args[0]) { + case "with-on-error": withOnError = true; break; + default: throw new RuntimeException("Invalid argument " + args[1]); + } + } + + List arguments = new ArrayList<>(); + Collections.addAll(arguments, + "-XX:+UnlockDiagnosticVMOptions", + "-Xmx100M", + "-XX:ErrorHandlerTest=14", + "-XX:+TestUnresponsiveErrorHandler", + "-XX:ErrorLogTimeout=" + ERROR_LOG_TIMEOUT, + "-XX:-CreateCoredumpOnCrash"); + if (withOnError) { + arguments.add("-XX:OnError=echo hi"); + } + arguments.add("-version"); + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(arguments); OutputAnalyzer output_detail = new OutputAnalyzer(pb.start()); @@ -92,8 +120,10 @@ public class TimeoutInErrorHandlingTest { output_detail.shouldMatch("# A fatal error has been detected by the Java Runtime Environment:.*"); output_detail.shouldMatch("# +(?:SIGSEGV|SIGBUS|EXCEPTION_ACCESS_VIOLATION).*"); - // VM should have been aborted by WatcherThread - output_detail.shouldMatch(".*timer expired, abort.*"); + // Unless we specified OnError, VM should have been aborted by WatcherThread + if (!withOnError) { + output_detail.shouldMatch(".*timer expired, abort.*"); + } // extract hs-err file File hs_err_file;