8293472: Incorrect container resource limit detection if manual cgroup fs mounts present

Reviewed-by: sgehwolf, iklam
This commit is contained in:
casparcwang 2022-09-15 08:47:05 +00:00 committed by Severin Gehwolf
parent 1caba0f13c
commit 8f3bbe950f
6 changed files with 135 additions and 54 deletions

View File

@ -123,6 +123,32 @@ CgroupSubsystem* CgroupSubsystemFactory::create() {
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, pids, memory);
}
void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos,
int controller,
const char* name,
char* mount_path,
char* root_path) {
if (cg_infos[controller]._mount_path != NULL) {
// On some systems duplicate controllers get mounted in addition to
// the main cgroup controllers most likely under /sys/fs/cgroup. In that
// case pick the one under /sys/fs/cgroup and discard others.
if (strstr(cg_infos[controller]._mount_path, "/sys/fs/cgroup") != cg_infos[controller]._mount_path) {
log_debug(os, container)("Duplicate %s controllers detected. Picking %s, skipping %s.",
name, mount_path, cg_infos[controller]._mount_path);
os::free(cg_infos[controller]._mount_path);
os::free(cg_infos[controller]._root_mount_path);
cg_infos[controller]._mount_path = os::strdup(mount_path);
cg_infos[controller]._root_mount_path = os::strdup(root_path);
} else {
log_debug(os, container)("Duplicate %s controllers detected. Picking %s, skipping %s.",
name, cg_infos[controller]._mount_path, mount_path);
}
} else {
cg_infos[controller]._mount_path = os::strdup(mount_path);
cg_infos[controller]._root_mount_path = os::strdup(root_path);
}
}
bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
const char* proc_cgroups,
const char* proc_self_cgroup,
@ -288,7 +314,6 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
bool cgroupv2_mount_point_found = false;
bool any_cgroup_mounts_found = false;
while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
char tmp_mount_point[MAXPATHLEN+1];
char tmp_fs_type[MAXPATHLEN+1];
char tmproot[MAXPATHLEN+1];
char tmpmount[MAXPATHLEN+1];
@ -299,15 +324,13 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
// Cgroup v2 relevant info. We only look for the _mount_path iff is_cgroupsV2 so
// as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1
// block in the hybrid case.
//
if (is_cgroupsV2 && sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s %*s %*s", tmp_mount_point, tmp_fs_type) == 2) {
if (is_cgroupsV2 && sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %*s", tmproot, tmpmount, tmp_fs_type) == 3) {
// we likely have an early match return (e.g. cgroup fs match), be sure we have cgroup2 as fstype
if (!cgroupv2_mount_point_found && strcmp("cgroup2", tmp_fs_type) == 0) {
if (strcmp("cgroup2", tmp_fs_type) == 0) {
cgroupv2_mount_point_found = true;
any_cgroup_mounts_found = true;
for (int i = 0; i < CG_INFO_LENGTH; i++) {
assert(cg_infos[i]._mount_path == NULL, "_mount_path memory stomping");
cg_infos[i]._mount_path = os::strdup(tmp_mount_point);
set_controller_paths(cg_infos, i, "(cg2, unified)", tmpmount, tmproot);
}
}
}
@ -332,47 +355,23 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
while ((token = strsep(&cptr, ",")) != NULL) {
if (strcmp(token, "memory") == 0) {
any_cgroup_mounts_found = true;
assert(cg_infos[MEMORY_IDX]._mount_path == NULL, "stomping of _mount_path");
cg_infos[MEMORY_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[MEMORY_IDX]._root_mount_path = os::strdup(tmproot);
set_controller_paths(cg_infos, MEMORY_IDX, token, tmpmount, tmproot);
cg_infos[MEMORY_IDX]._data_complete = true;
} else if (strcmp(token, "cpuset") == 0) {
any_cgroup_mounts_found = true;
if (cg_infos[CPUSET_IDX]._mount_path != NULL) {
// On some systems duplicate cpuset controllers get mounted in addition to
// the main cgroup controllers most likely under /sys/fs/cgroup. In that
// case pick the one under /sys/fs/cgroup and discard others.
if (strstr(cg_infos[CPUSET_IDX]._mount_path, "/sys/fs/cgroup") != cg_infos[CPUSET_IDX]._mount_path) {
log_warning(os, container)("Duplicate cpuset controllers detected. Picking %s, skipping %s.",
tmpmount, cg_infos[CPUSET_IDX]._mount_path);
os::free(cg_infos[CPUSET_IDX]._mount_path);
cg_infos[CPUSET_IDX]._mount_path = os::strdup(tmpmount);
} else {
log_warning(os, container)("Duplicate cpuset controllers detected. Picking %s, skipping %s.",
cg_infos[CPUSET_IDX]._mount_path, tmpmount);
}
} else {
cg_infos[CPUSET_IDX]._mount_path = os::strdup(tmpmount);
}
cg_infos[CPUSET_IDX]._root_mount_path = os::strdup(tmproot);
set_controller_paths(cg_infos, CPUSET_IDX, token, tmpmount, tmproot);
cg_infos[CPUSET_IDX]._data_complete = true;
} else if (strcmp(token, "cpu") == 0) {
any_cgroup_mounts_found = true;
assert(cg_infos[CPU_IDX]._mount_path == NULL, "stomping of _mount_path");
cg_infos[CPU_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[CPU_IDX]._root_mount_path = os::strdup(tmproot);
set_controller_paths(cg_infos, CPU_IDX, token, tmpmount, tmproot);
cg_infos[CPU_IDX]._data_complete = true;
} else if (strcmp(token, "cpuacct") == 0) {
any_cgroup_mounts_found = true;
assert(cg_infos[CPUACCT_IDX]._mount_path == NULL, "stomping of _mount_path");
cg_infos[CPUACCT_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[CPUACCT_IDX]._root_mount_path = os::strdup(tmproot);
set_controller_paths(cg_infos, CPUACCT_IDX, token, tmpmount, tmproot);
cg_infos[CPUACCT_IDX]._data_complete = true;
} else if (strcmp(token, "pids") == 0) {
any_cgroup_mounts_found = true;
assert(cg_infos[PIDS_IDX]._mount_path == NULL, "stomping of _mount_path");
cg_infos[PIDS_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[PIDS_IDX]._root_mount_path = os::strdup(tmproot);
set_controller_paths(cg_infos, PIDS_IDX, token, tmpmount, tmproot);
cg_infos[PIDS_IDX]._data_complete = true;
}
}

View File

@ -311,6 +311,11 @@ class CgroupSubsystemFactory: AllStatic {
}
#endif
static void set_controller_paths(CgroupInfo* cg_infos,
int controller,
const char* name,
char* mount_path,
char* root_path);
// Determine the cgroup type (version 1 or version 2), given
// relevant paths to files. Sets 'flags' accordingly.
static bool determine_type(CgroupInfo* cg_infos,

View File

@ -62,11 +62,19 @@ public class CgroupSubsystemFactory {
private Path cgroupv1MntInfoZeroHierarchy;
private Path cgroupv2CgInfoZeroHierarchy;
private Path cgroupv2MntInfoZeroHierarchy;
private Path cgroupv2MntInfoDouble;
private Path cgroupv2MntInfoDouble2;
private Path cgroupv1CgInfoNonZeroHierarchy;
private Path cgroupv1MntInfoNonZeroHierarchyOtherOrder;
private Path cgroupv1MntInfoNonZeroHierarchy;
private Path cgroupv1MntInfoDoubleCpuset;
private Path cgroupv1MntInfoDoubleCpuset2;
private Path cgroupv1MntInfoDoubleMemory;
private Path cgroupv1MntInfoDoubleMemory2;
private Path cgroupv1MntInfoDoubleCpu;
private Path cgroupv1MntInfoDoubleCpu2;
private Path cgroupv1MntInfoDoublePids;
private Path cgroupv1MntInfoDoublePids2;
private Path cgroupv1MntInfoSystemdOnly;
private String mntInfoEmpty = "";
private Path cgroupV1SelfCgroup;
@ -160,6 +168,15 @@ public class CgroupSubsystemFactory {
private String mntInfoCgroupv1MoreCpusetLine = "121 32 0:37 / /cpusets rw,relatime shared:69 - cgroup none rw,cpuset\n";
private String mntInfoCgroupv1DoubleCpuset = mntInfoCgroupv1MoreCpusetLine + mntInfoHybrid;
private String mntInfoCgroupv1DoubleCpuset2 = mntInfoHybrid + mntInfoCgroupv1MoreCpusetLine;
private String mntInfoCgroupv1MoreMemoryLine = "1100 1098 0:28 / /memory rw,nosuid,nodev,noexec,relatime master:6 - cgroup cgroup rw,memory\n";
private String mntInfoCgroupv1DoubleMemory = mntInfoCgroupv1MoreMemoryLine + mntInfoHybrid;
private String mntInfoCgroupv1DoubleMemory2 = mntInfoHybrid + mntInfoCgroupv1MoreMemoryLine;
private String mntInfoCgroupv1DoubleCpuLine = "1101 1098 0:29 / /cpu,cpuacct rw,nosuid,nodev,noexec,relatime master:7 - cgroup cgroup rw,cpu,cpuacct\n";
private String mntInfoCgroupv1DoubleCpu = mntInfoCgroupv1DoubleCpuLine + mntInfoHybrid;
private String mntInfoCgroupv1DoubleCpu2 = mntInfoHybrid + mntInfoCgroupv1DoubleCpuLine;
private String mntInfoCgroupv1DoublePidsLine = "1107 1098 0:35 / /pids rw,nosuid,nodev,noexec,relatime master:13 - cgroup cgroup rw,pids\n";
private String mntInfoCgroupv1DoublePids = mntInfoCgroupv1DoublePidsLine + mntInfoHybrid;
private String mntInfoCgroupv1DoublePids2 = mntInfoHybrid + mntInfoCgroupv1DoublePidsLine;
private String cgroupsNonZeroHierarchy =
"#subsys_name hierarchy num_cgroups enabled\n" +
"cpuset 3 1 1\n" +
@ -175,7 +192,11 @@ public class CgroupSubsystemFactory {
"hugetlb 6 1 1\n" +
"pids 9 80 1"; // hierarchy has to match procSelfCgroupHybridContent
private String mntInfoCgroupsV2Only =
"28 21 0:25 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 none rw,seclabel,nsdelegate";
"28 21 0:25 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 none rw,seclabel,nsdelegate\n";
private String mntInfoCgroupsV2MoreLine =
"240 232 0:24 /../.. /cgroup-in ro,relatime - cgroup2 cgroup2 rw,nsdelegate\n";
private String mntInfoCgroupsV2Double = mntInfoCgroupsV2MoreLine + mntInfoCgroupsV2Only;
private String mntInfoCgroupsV2Double2 = mntInfoCgroupsV2Only + mntInfoCgroupsV2MoreLine;
private String mntInfoCgroupsV1SystemdOnly =
"35 26 0:26 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup systemd rw,name=systemd\n" +
"26 18 0:19 / /sys/fs/cgroup rw,relatime - tmpfs none rw,size=4k,mode=755\n";
@ -217,6 +238,12 @@ public class CgroupSubsystemFactory {
cgroupv2MntInfoZeroHierarchy = Paths.get(existingDirectory.toString(), "mountinfo_cgroupv2");
Files.writeString(cgroupv2MntInfoZeroHierarchy, mntInfoCgroupsV2Only);
cgroupv2MntInfoDouble = Paths.get(existingDirectory.toString(), "mountinfo_cgroupv2_double");
Files.writeString(cgroupv2MntInfoDouble, mntInfoCgroupsV2Double);
cgroupv2MntInfoDouble2 = Paths.get(existingDirectory.toString(), "mountinfo_cgroupv2_double2");
Files.writeString(cgroupv2MntInfoDouble2, mntInfoCgroupsV2Double2);
cgroupv1CgInfoNonZeroHierarchy = Paths.get(existingDirectory.toString(), "cgroups_non_zero");
Files.writeString(cgroupv1CgInfoNonZeroHierarchy, cgroupsNonZeroHierarchy);
@ -244,6 +271,24 @@ public class CgroupSubsystemFactory {
cgroupv1MntInfoDoubleCpuset2 = Paths.get(existingDirectory.toString(), "mnt_info_cgroupv1_double_cpuset2");
Files.writeString(cgroupv1MntInfoDoubleCpuset2, mntInfoCgroupv1DoubleCpuset2);
cgroupv1MntInfoDoubleMemory = Paths.get(existingDirectory.toString(), "mnt_info_cgroupv1_double_memory");
Files.writeString(cgroupv1MntInfoDoubleMemory, mntInfoCgroupv1DoubleMemory);
cgroupv1MntInfoDoubleMemory2 = Paths.get(existingDirectory.toString(), "mnt_info_cgroupv1_double_memory2");
Files.writeString(cgroupv1MntInfoDoubleMemory2, mntInfoCgroupv1DoubleMemory2);
cgroupv1MntInfoDoubleCpu = Paths.get(existingDirectory.toString(), "mnt_info_cgroupv1_double_cpu");
Files.writeString(cgroupv1MntInfoDoubleCpu, mntInfoCgroupv1DoubleCpu);
cgroupv1MntInfoDoubleCpu2 = Paths.get(existingDirectory.toString(), "mnt_info_cgroupv1_double_cpu2");
Files.writeString(cgroupv1MntInfoDoubleCpu2, mntInfoCgroupv1DoubleCpu2);
cgroupv1MntInfoDoublePids = Paths.get(existingDirectory.toString(), "mnt_info_cgroupv1_double_pids");
Files.writeString(cgroupv1MntInfoDoublePids, mntInfoCgroupv1DoublePids);
cgroupv1MntInfoDoublePids2 = Paths.get(existingDirectory.toString(), "mnt_info_cgroupv1_double_pids2");
Files.writeString(cgroupv1MntInfoDoublePids2, mntInfoCgroupv1DoublePids2);
cgroupv1MntInfoSystemdOnly = Paths.get(existingDirectory.toString(), "mnt_info_cgroupv1_systemd_only");
Files.writeString(cgroupv1MntInfoSystemdOnly, mntInfoCgroupsV1SystemdOnly);
@ -291,14 +336,14 @@ public class CgroupSubsystemFactory {
System.out.println("testCgroupv1JoinControllerMounts PASSED!");
}
public void testCgroupv1MultipleCpusetMounts(WhiteBox wb, Path mountInfo) {
public void testCgroupv1MultipleControllerMounts(WhiteBox wb, Path mountInfo) {
String procCgroups = cgroupv1CgInfoNonZeroHierarchy.toString();
String procSelfCgroup = cgroupV1SelfCgroup.toString();
String procSelfMountinfo = mountInfo.toString();
int retval = wb.validateCgroup(procCgroups, procSelfCgroup, procSelfMountinfo);
Asserts.assertEQ(CGROUPS_V1, retval, "Multiple cpuset controllers, but only one in /sys/fs/cgroup");
Asserts.assertEQ(CGROUPS_V1, retval, "Multiple controllers, but only one in /sys/fs/cgroup");
Asserts.assertTrue(isValidCgroup(retval));
System.out.println("testCgroupv1MultipleCpusetMounts PASSED!");
System.out.println("testCgroupv1MultipleControllerMounts PASSED!");
}
public void testCgroupv1SystemdOnly(WhiteBox wb) {
@ -341,10 +386,10 @@ public class CgroupSubsystemFactory {
System.out.println("testCgroupv1MissingMemoryController PASSED!");
}
public void testCgroupv2(WhiteBox wb) {
public void testCgroupv2(WhiteBox wb, Path mountInfo) {
String procCgroups = cgroupv2CgInfoZeroHierarchy.toString();
String procSelfCgroup = cgroupV2SelfCgroup.toString();
String procSelfMountinfo = cgroupv2MntInfoZeroHierarchy.toString();
String procSelfMountinfo = mountInfo.toString();
int retval = wb.validateCgroup(procCgroups, procSelfCgroup, procSelfMountinfo);
Asserts.assertEQ(CGROUPS_V2, retval, "Expected");
Asserts.assertTrue(isValidCgroup(retval));
@ -388,13 +433,21 @@ public class CgroupSubsystemFactory {
try {
test.testCgroupv1SystemdOnly(wb);
test.testCgroupv1NoMounts(wb);
test.testCgroupv2(wb);
test.testCgroupv2(wb, test.cgroupv2MntInfoZeroHierarchy);
test.testCgroupv2(wb, test.cgroupv2MntInfoDouble);
test.testCgroupv2(wb, test.cgroupv2MntInfoDouble2);
test.testCgroupV1Hybrid(wb);
test.testCgroupV1HybridMntInfoOrder(wb);
test.testCgroupv1MissingMemoryController(wb);
test.testCgroupv2NoCgroup2Fs(wb);
test.testCgroupv1MultipleCpusetMounts(wb, test.cgroupv1MntInfoDoubleCpuset);
test.testCgroupv1MultipleCpusetMounts(wb, test.cgroupv1MntInfoDoubleCpuset2);
test.testCgroupv1MultipleControllerMounts(wb, test.cgroupv1MntInfoDoubleCpuset);
test.testCgroupv1MultipleControllerMounts(wb, test.cgroupv1MntInfoDoubleCpuset2);
test.testCgroupv1MultipleControllerMounts(wb, test.cgroupv1MntInfoDoubleMemory);
test.testCgroupv1MultipleControllerMounts(wb, test.cgroupv1MntInfoDoubleMemory2);
test.testCgroupv1MultipleControllerMounts(wb, test.cgroupv1MntInfoDoubleCpu);
test.testCgroupv1MultipleControllerMounts(wb, test.cgroupv1MntInfoDoubleCpu2);
test.testCgroupv1MultipleControllerMounts(wb, test.cgroupv1MntInfoDoublePids);
test.testCgroupv1MultipleControllerMounts(wb, test.cgroupv1MntInfoDoublePids2);
test.testCgroupv1JoinControllerCombo(wb);
test.testNonZeroHierarchyOnlyFreezer(wb);
} finally {

View File

@ -53,6 +53,7 @@ public class DockerBasicTest {
try {
testJavaVersion();
testHelloDocker();
testJavaVersionWithCgMounts();
} finally {
if (!DockerTestUtils.RETAIN_IMAGE_AFTER_TEST) {
DockerTestUtils.removeDockerImage(imageNameAndTag);
@ -81,4 +82,17 @@ public class DockerBasicTest {
.shouldHaveExitValue(0)
.shouldContain("Hello Docker");
}
private static void testJavaVersionWithCgMounts() throws Exception {
DockerRunOptions opts =
new DockerRunOptions(imageNameAndTag, "/jdk/bin/java", "-version")
.addDockerOpts("-v", "/sys/fs/cgroup:/cgroups-in:ro");
// Duplicated cgroup mounts should be handled by the container detection
// code and should not cause any error/warning output.
DockerTestUtils.dockerRunJava(opts)
.shouldHaveExitValue(0)
.shouldNotMatch("\\[os,container *\\]");
}
}

View File

@ -70,10 +70,11 @@ public class TestCPUAwareness {
testActiveProcessorCount(2, 2);
// cpu quota and period
testCpuQuotaAndPeriod(50*1000, 100*1000);
testCpuQuotaAndPeriod(100*1000, 100*1000);
testCpuQuotaAndPeriod(150*1000, 100*1000);
testCpuQuotaAndPeriod(400*1000, 100*1000);
testCpuQuotaAndPeriod(50*1000, 100*1000, false);
testCpuQuotaAndPeriod(100*1000, 100*1000, false);
testCpuQuotaAndPeriod(150*1000, 100*1000, false);
testCpuQuotaAndPeriod(400*1000, 100*1000, false);
testCpuQuotaAndPeriod(50*1000, 100*1000, true /* additional cgroup mount */);
testOperatingSystemMXBeanAwareness("0.5", "1");
testOperatingSystemMXBeanAwareness("1.0", "1");
@ -153,7 +154,7 @@ public class TestCPUAwareness {
}
private static void testCpuQuotaAndPeriod(int quota, int period)
private static void testCpuQuotaAndPeriod(int quota, int period, boolean addCgmounts)
throws Exception {
Common.logNewTestCase("test cpu quota and period: ");
System.out.println("quota = " + quota);
@ -167,6 +168,10 @@ public class TestCPUAwareness {
.addDockerOpts("--cpu-period=" + period)
.addDockerOpts("--cpu-quota=" + quota);
if (addCgmounts) {
opts = opts.addDockerOpts("--volume", "/sys/fs/cgroup:/cgroups-in:ro");
}
Common.run(opts)
.shouldMatch("CPU Period is.*" + period)
.shouldMatch("CPU Quota is.*" + quota)

View File

@ -63,10 +63,11 @@ public class TestMemoryAwareness {
DockerTestUtils.buildJdkContainerImage(imageName);
try {
testMemoryLimit("100m", "104857600");
testMemoryLimit("500m", "524288000");
testMemoryLimit("1g", "1073741824");
testMemoryLimit("4g", "4294967296");
testMemoryLimit("100m", "104857600", false);
testMemoryLimit("500m", "524288000", false);
testMemoryLimit("1g", "1073741824", false);
testMemoryLimit("4g", "4294967296", false);
testMemoryLimit("100m", "104857600", true /* additional cgroup mount */);
testMemorySoftLimit("500m", "524288000");
testMemorySoftLimit("1g", "1073741824");
@ -98,7 +99,7 @@ public class TestMemoryAwareness {
}
private static void testMemoryLimit(String valueToSet, String expectedTraceValue)
private static void testMemoryLimit(String valueToSet, String expectedTraceValue, boolean addCgmounts)
throws Exception {
Common.logNewTestCase("memory limit: " + valueToSet);
@ -106,6 +107,10 @@ public class TestMemoryAwareness {
DockerRunOptions opts = Common.newOpts(imageName)
.addDockerOpts("--memory", valueToSet);
if (addCgmounts) {
opts = opts.addDockerOpts("--volume", "/sys/fs/cgroup:/cgroups-in:ro");
}
Common.run(opts)
.shouldMatch("Memory Limit is:.*" + expectedTraceValue);
}