8286030: Avoid JVM crash when containers share the same /tmp dir

Reviewed-by: stuefe, sgehwolf
This commit is contained in:
Ioi Lam 2022-07-18 04:10:08 +00:00
parent 4dd236b40a
commit 84f23149e2
3 changed files with 297 additions and 57 deletions
src/hotspot/os/posix
test/hotspot/jtreg/containers/docker

@ -48,6 +48,10 @@
# include <signal.h>
# include <pwd.h>
#if defined(LINUX)
# include <sys/file.h>
#endif
static char* backing_store_file_name = NULL; // name of the backing store
// file, if successfully created.
@ -76,18 +80,6 @@ static char* create_standard_memory(size_t size) {
return mapAddress;
}
// delete the PerfData memory region
//
static void delete_standard_memory(char* addr, size_t size) {
// there are no persistent external resources to cleanup for standard
// memory. since DestroyJavaVM does not support unloading of the JVM,
// cleanup of the memory resource is not performed. The memory will be
// reclaimed by the OS upon termination of the process.
//
return;
}
// save the specified memory region to the given file
//
// Note: this function might be called from signal handler (by os::abort()),
@ -707,17 +699,17 @@ static void remove_file(const char* path) {
}
}
// cleanup stale shared memory resources
// cleanup stale shared memory files
//
// This method attempts to remove all stale shared memory files in
// the named user temporary directory. It scans the named directory
// for files matching the pattern ^$[0-9]*$. For each file found, the
// process id is extracted from the file name and a test is run to
// determine if the process is alive. If the process is not alive,
// any stale file resources are removed.
// for files matching the pattern ^$[0-9]*$.
//
static void cleanup_sharedmem_resources(const char* dirname) {
// This directory should be used only by JVM processes owned by the
// current user to store PerfMemory files. Any other files found
// in this directory may be removed.
//
static void cleanup_sharedmem_files(const char* dirname) {
int saved_cwd_fd;
// open the directory and set the current working directory to it
@ -727,48 +719,95 @@ static void cleanup_sharedmem_resources(const char* dirname) {
return;
}
// for each entry in the directory that matches the expected file
// name pattern, determine if the file resources are stale and if
// so, remove the file resources. Note, instrumented HotSpot processes
// for this user may start and/or terminate during this search and
// remove or create new files in this directory. The behavior of this
// loop under these conditions is dependent upon the implementation of
// opendir/readdir.
// For each entry in the directory that matches the expected file
// name pattern, remove the file if it's determine to be stale
// Note, instrumented HotSpot processes for this user may start and/or
// terminate during this search and remove or create new files in this
// directory. The behavior of this loop under these conditions is dependent
// upon the implementation of opendir/readdir.
//
struct dirent* entry;
errno = 0;
while ((entry = os::readdir(dirp)) != NULL) {
pid_t pid = filename_to_pid(entry->d_name);
const char* filename = entry->d_name;
pid_t pid = filename_to_pid(filename);
if (pid == 0) {
if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
if (strcmp(filename, ".") != 0 && strcmp(filename, "..") != 0) {
// attempt to remove all unexpected files, except "." and ".."
unlink(entry->d_name);
unlink(filename);
}
errno = 0;
continue;
}
// we now have a file name that converts to a valid integer
// that could represent a process id . if this process id
// matches the current process id or the process is not running,
// then remove the stale file resources.
#if defined(LINUX)
// Special case on Linux, if multiple containers share the
// same /tmp directory:
//
// process liveness is detected by sending signal number 0 to
// the process id (see kill(2)). if kill determines that the
// process does not exist, then the file resources are removed.
// if kill determines that that we don't have permission to
// signal the process, then the file resources are assumed to
// be stale and are removed because the resources for such a
// process should be in a different user specific directory.
//
if ((pid == os::current_process_id()) ||
(kill(pid, 0) == OS_ERR && (errno == ESRCH || errno == EPERM))) {
unlink(entry->d_name);
// - All the JVMs must have the JDK-8286030 fix, or the behavior
// is undefined.
// - We cannot rely on the values of the pid, because it could
// be a process in a different namespace. We must use the flock
// protocol to determine if a live process is using this file.
// See create_sharedmem_file().
int fd;
RESTARTABLE(os::open(filename, O_RDONLY, 0), fd);
if (fd == OS_ERR) {
// Something wrong happened. Ignore the error and don't try to remove the
// file.
log_debug(perf, memops)("os::open() for stale file check failed for %s/%s", dirname, filename);
errno = 0;
continue;
}
int n;
RESTARTABLE(::flock(fd, LOCK_EX|LOCK_NB), n);
if (n != 0) {
// Either another process holds the exclusive lock on this file, or
// something wrong happened. Ignore the error and don't try to remove the
// file.
log_debug(perf, memops)("flock for stale file check failed for %s/%s", dirname, filename);
::close(fd);
errno = 0;
continue;
}
// We are able to lock the file, but this file might have been created
// by an older JVM that doesn't use the flock prototol, so we must do
// the folowing checks (which are also done by older JVMs).
#endif
// The following code assumes that pid must be in the same
// namespace as the current process.
bool stale = false;
if (pid == os::current_process_id()) {
// The file was created by a terminated process that happened
// to have the same pid as the current process.
stale = true;
} else if (kill(pid, 0) == OS_ERR) {
if (errno == ESRCH) {
// The target process does not exist.
stale = true;
} else if (errno == EPERM) {
// The file was created by a terminated process that happened
// to have the same pid as a process not owned by the current user.
stale = true;
}
}
if (stale) {
log_info(perf, memops)("Remove stale file %s/%s", dirname, filename);
unlink(filename);
}
#if defined(LINUX)
// Hold the lock until here to prevent other JVMs from using this file
// while we were in the middle of deleting it.
::close(fd);
#endif
errno = 0;
}
@ -814,13 +853,13 @@ static bool make_user_tmp_dir(const char* dirname) {
return true;
}
// create the shared memory file resources
// create the shared memory file
//
// This method creates the shared memory file with the given size
// This method also creates the user specific temporary directory, if
// it does not yet exist.
//
static int create_sharedmem_resources(const char* dirname, const char* filename, size_t size) {
static int create_sharedmem_file(const char* dirname, const char* filename, size_t size) {
// make the user temporary directory
if (!make_user_tmp_dir(dirname)) {
@ -865,6 +904,32 @@ static int create_sharedmem_resources(const char* dirname, const char* filename,
return -1;
}
#if defined(LINUX)
// On Linux, different containerized processes that share the same /tmp
// directory (e.g., with "docker --volume ...") may have the same pid and
// try to use the same file. To avoid conflicts among such
// processes, we allow only one of them (the winner of the flock() call)
// to write to the file. All the other processes will give up and will
// have perfdata disabled.
//
// Note that the flock will be automatically given up when the winner
// process exits.
//
// The locking protocol works only with other JVMs that have the JDK-8286030
// fix. If you are sharing the /tmp difrectory among different containers,
// do not use older JVMs that don't have this fix, or the behavior is undefined.
int n;
RESTARTABLE(::flock(fd, LOCK_EX|LOCK_NB), n);
if (n != 0) {
log_warning(perf, memops)("Cannot use file %s/%s because %s (errno = %d)", dirname, filename,
(errno == EWOULDBLOCK) ?
"it is locked by another process" :
"flock() failed", errno);
::close(fd);
return -1;
}
#endif
ssize_t result;
// truncate the file to get rid of any existing data
@ -981,12 +1046,13 @@ static char* mmap_create_shared(size_t size) {
}
// cleanup any stale shared memory files
cleanup_sharedmem_resources(dirname);
cleanup_sharedmem_files(dirname);
assert(((size > 0) && (size % os::vm_page_size() == 0)),
"unexpected PerfMemory region size");
fd = create_sharedmem_resources(dirname, short_filename, size);
log_info(perf, memops)("Trying to open %s/%s", dirname, short_filename);
fd = create_sharedmem_file(dirname, short_filename, size);
FREE_C_HEAP_ARRAY(char, user_name);
FREE_C_HEAP_ARRAY(char, dirname);
@ -1019,6 +1085,8 @@ static char* mmap_create_shared(size_t size) {
// it does not go through os api, the operation has to record from here
MemTracker::record_virtual_memory_reserve_and_commit((address)mapAddress, size, CURRENT_PC, mtInternal);
log_info(perf, memops)("Successfully opened");
return mapAddress;
}
@ -1053,10 +1121,10 @@ static char* create_shared_memory(size_t size) {
//
static void delete_shared_memory(char* addr, size_t size) {
// cleanup the persistent shared memory resources. since DestroyJavaVM does
// not support unloading of the JVM, unmapping of the memory resource is
// Remove the shared memory file. Since DestroyJavaVM does
// not support unloading of the JVM, unmapping of the memory region is
// not performed. The memory will be reclaimed by the OS upon termination of
// the process. The backing store file is deleted from the file system.
// the process.
assert(!PerfDisableSharedMem, "shouldn't be here");
@ -1232,10 +1300,7 @@ void PerfMemory::delete_memory_region() {
save_memory_to_file(start(), capacity());
}
if (PerfDisableSharedMem) {
delete_standard_memory(start(), capacity());
}
else {
if (!PerfDisableSharedMem) {
delete_shared_memory(start(), capacity());
}
}

@ -0,0 +1,132 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8286030
* @key cgroups
* @summary Test for hsperfdata file name conflict when two containers share the same /tmp directory
* @requires docker.support
* @library /test/lib
* @build WaitForFlagFile
* @run driver ShareTmpDir
*/
import java.io.File;
import java.io.FileOutputStream;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import jdk.test.lib.Asserts;
import jdk.test.lib.Utils;
import jdk.test.lib.containers.docker.Common;
import jdk.test.lib.containers.docker.DockerRunOptions;
import jdk.test.lib.containers.docker.DockerTestUtils;
import jdk.test.lib.process.OutputAnalyzer;
import jtreg.SkippedException;
public class ShareTmpDir {
private static final String imageName = Common.imageName("sharetmpdir");
public static void main(String[] args) throws Exception {
if (!DockerTestUtils.canTestDocker()) {
return;
}
DockerTestUtils.buildJdkContainerImage(imageName);
try {
test();
} finally {
if (!DockerTestUtils.RETAIN_IMAGE_AFTER_TEST) {
DockerTestUtils.removeDockerImage(imageName);
}
}
}
static OutputAnalyzer out1, out2;
private static void test() throws Exception {
File sharedtmpdir = new File("sharedtmpdir");
File flag = new File(sharedtmpdir, "flag");
File started = new File(sharedtmpdir, "started");
sharedtmpdir.mkdir();
flag.delete();
started.delete();
DockerRunOptions opts = new DockerRunOptions(imageName, "/jdk/bin/java", "WaitForFlagFile");
opts.addDockerOpts("--volume", Utils.TEST_CLASSES + ":/test-classes/");
opts.addDockerOpts("--volume", sharedtmpdir.getAbsolutePath() + ":/tmp/");
opts.addJavaOpts("-Xlog:os+container=trace", "-Xlog:perf+memops=debug", "-cp", "/test-classes/");
Thread t1 = new Thread() {
public void run() {
try { out1 = Common.run(opts); } catch (Exception e) { e.printStackTrace(); }
}
};
t1.start();
Thread t2 = new Thread() {
public void run() {
try { out2 = Common.run(opts); } catch (Exception e) { e.printStackTrace(); }
}
};
t2.start();
while (!started.exists()) {
System.out.println("Wait for at least one JVM to start");
Thread.sleep(1000);
}
// Set the flag for the two JVMs to exit
FileOutputStream fout = new FileOutputStream(flag);
fout.close();
t1.join();
t2.join();
Pattern pattern = Pattern.compile("perf,memops.*Trying to open (/tmp/hsperfdata_[a-z0-9]*/[0-9]*)");
Matcher matcher;
matcher = pattern.matcher(out1.getStdout());
Asserts.assertTrue(matcher.find());
String file1 = matcher.group(1);
matcher = pattern.matcher(out2.getStdout());
Asserts.assertTrue(matcher.find());
String file2 = matcher.group(1);
Asserts.assertTrue(file1 != null);
Asserts.assertTrue(file2 != null);
if (file1.equals(file2)) {
// This should be the common case -- the first started process in a container should
// have pid==1.
// One of the two containers must fail to create the hsperf file.
String s = "Cannot use file " + file1 + " because it is locked by another process";
Asserts.assertTrue(out1.getStdout().contains(s) ||
out2.getStdout().contains(s));
} else {
throw new SkippedException("Java in the two containers don't have the same pid: " + file1 + " vs " + file2);
}
}
}

@ -0,0 +1,43 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
import java.io.File;
import java.io.FileOutputStream;
public class WaitForFlagFile {
public static void main(String[] args) throws Exception {
System.out.println("WaitForFlagFile: Entering");
File started = new File("/tmp/started");
FileOutputStream fout = new FileOutputStream(started);
fout.close();
File flag = new File("/tmp/flag");
while (!flag.exists()) {
System.out.println("WaitForFlagFile: Waiting");
Thread.sleep(500);
}
System.out.println("WaitForFlagFile: Exiting");
}
}