From 2bbf8a2a964a64d88298a3dd184459af506e58ed Mon Sep 17 00:00:00 2001
From: Severin Gehwolf <sgehwolf@openjdk.org>
Date: Fri, 9 Oct 2020 16:25:50 +0000
Subject: [PATCH] 8245543: Cgroups: Incorrect detection logic on some systems
 (still reproducible)

Reviewed-by: bobv, shade
---
 .../os/linux/cgroupSubsystem_linux.cpp        |  7 ++-
 .../platform/CgroupSubsystemFactory.java      | 61 +++++++++++++++++--
 .../cgroup/CgroupSubsystemFactory.java        | 18 ++++++
 .../cgroup/TestCgroupSubsystemFactory.java    | 16 +++++
 4 files changed, 96 insertions(+), 6 deletions(-)

diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp
index fada2a732bf..fb653c762bc 100644
--- a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp
+++ b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp
@@ -294,14 +294,15 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
         // Skip cgroup2 fs lines on hybrid or unified hierarchy.
         continue;
       }
-      any_cgroup_mounts_found = true;
       while ((token = strsep(&cptr, ",")) != NULL) {
         if (strcmp(token, "memory") == 0) {
+          any_cgroup_mounts_found = true;
           assert(cg_infos[MEMORY_IDX]._mount_path == NULL, "stomping of _mount_path");
           cg_infos[MEMORY_IDX]._mount_path = os::strdup(tmpmount);
           cg_infos[MEMORY_IDX]._root_mount_path = os::strdup(tmproot);
           cg_infos[MEMORY_IDX]._data_complete = true;
         } else if (strcmp(token, "cpuset") == 0) {
+          any_cgroup_mounts_found = true;
           if (cg_infos[CPUSET_IDX]._mount_path != NULL) {
             // On some systems duplicate cpuset controllers get mounted in addition to
             // the main cgroup controllers most likely under /sys/fs/cgroup. In that
@@ -321,11 +322,13 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
           cg_infos[CPUSET_IDX]._root_mount_path = os::strdup(tmproot);
           cg_infos[CPUSET_IDX]._data_complete = true;
         } else if (strcmp(token, "cpu") == 0) {
+          any_cgroup_mounts_found = true;
           assert(cg_infos[CPU_IDX]._mount_path == NULL, "stomping of _mount_path");
           cg_infos[CPU_IDX]._mount_path = os::strdup(tmpmount);
           cg_infos[CPU_IDX]._root_mount_path = os::strdup(tmproot);
           cg_infos[CPU_IDX]._data_complete = true;
         } else if (strcmp(token, "cpuacct") == 0) {
+          any_cgroup_mounts_found = true;
           assert(cg_infos[CPUACCT_IDX]._mount_path == NULL, "stomping of _mount_path");
           cg_infos[CPUACCT_IDX]._mount_path = os::strdup(tmpmount);
           cg_infos[CPUACCT_IDX]._root_mount_path = os::strdup(tmproot);
@@ -339,7 +342,7 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
   // Neither cgroup2 nor cgroup filesystems mounted via /proc/self/mountinfo
   // No point in continuing.
   if (!any_cgroup_mounts_found) {
-    log_trace(os, container)("No cgroup controllers mounted.");
+    log_trace(os, container)("No relevant cgroup controllers mounted.");
     cleanup(cg_infos);
     *flags = INVALID_CGROUPS_NO_MOUNT;
     return false;
diff --git a/src/java.base/linux/classes/jdk/internal/platform/CgroupSubsystemFactory.java b/src/java.base/linux/classes/jdk/internal/platform/CgroupSubsystemFactory.java
index 55fcbb1f40b..d71f5c1fbf2 100644
--- a/src/java.base/linux/classes/jdk/internal/platform/CgroupSubsystemFactory.java
+++ b/src/java.base/linux/classes/jdk/internal/platform/CgroupSubsystemFactory.java
@@ -33,6 +33,8 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import java.util.stream.Stream;
 
 import jdk.internal.platform.cgroupv1.CgroupV1Subsystem;
@@ -46,6 +48,31 @@ public class CgroupSubsystemFactory {
     private static final String BLKIO_CTRL = "blkio";
     private static final String MEMORY_CTRL = "memory";
 
+    /*
+     * From https://www.kernel.org/doc/Documentation/filesystems/proc.txt
+     *
+     *  36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+     *  (1)(2)(3)   (4)   (5)      (6)      (7)   (8) (9)   (10)         (11)
+     *
+     *  (1) mount ID:  unique identifier of the mount (may be reused after umount)
+     *  (2) parent ID:  ID of parent (or of self for the top of the mount tree)
+     *  (3) major:minor:  value of st_dev for files on filesystem
+     *  (4) root:  root of the mount within the filesystem
+     *  (5) mount point:  mount point relative to the process's root
+     *  (6) mount options:  per mount options
+     *  (7) optional fields:  zero or more fields of the form "tag[:value]"
+     *  (8) separator:  marks the end of the optional fields
+     *  (9) filesystem type:  name of filesystem of the form "type[.subtype]"
+     *  (10) mount source:  filesystem specific information or "none"
+     *  (11) super options:  per super block options
+     */
+    private static final Pattern MOUNTINFO_PATTERN = Pattern.compile(
+        "^[^\\s]+\\s+[^\\s]+\\s+[^\\s]+\\s+" + // (1), (2), (3)
+        "[^\\s]+\\s+([^\\s]+)\\s+" +           // (4), (5)     - group 1: mount point
+        "[^-]+-\\s+" +                         // (6), (7), (8)
+        "([^\\s]+)\\s+" +                      // (9)          - group 2: filesystem type
+        ".*$");                                // (10), (11)
+
     static CgroupMetrics create() {
         Optional<CgroupTypeResult> optResult = null;
         try {
@@ -114,12 +141,13 @@ public class CgroupSubsystemFactory {
             anyControllersEnabled = anyControllersEnabled || info.isEnabled();
         }
 
-        // If there are no mounted controllers in mountinfo, but we've only
-        // seen 0 hierarchy IDs in /proc/cgroups, we are on a cgroups v1 system.
+        // If there are no mounted, relevant cgroup controllers in mountinfo and only
+        // 0 hierarchy IDs in /proc/cgroups have been seen, we are on a cgroups v1 system.
         // However, continuing in that case does not make sense as we'd need
-        // information from mountinfo for the mounted controller paths anyway.
+        // information from mountinfo for the mounted controller paths which we wouldn't
+        // find anyway in that case.
         try (Stream<String> mntInfo = CgroupUtil.readFilePrivileged(Paths.get(mountInfo))) {
-            boolean anyCgroupMounted = mntInfo.anyMatch(line -> line.contains("cgroup"));
+            boolean anyCgroupMounted = mntInfo.anyMatch(CgroupSubsystemFactory::isRelevantControllerMount);
             if (!anyCgroupMounted && isCgroupsV2) {
                 return Optional.empty();
             }
@@ -128,6 +156,31 @@ public class CgroupSubsystemFactory {
         return Optional.of(result);
     }
 
+    private static boolean isRelevantControllerMount(String line) {
+         Matcher lineMatcher = MOUNTINFO_PATTERN.matcher(line.trim());
+         if (lineMatcher.matches()) {
+             String mountPoint = lineMatcher.group(1);
+             String fsType = lineMatcher.group(2);
+             if (fsType.equals("cgroup")) {
+                 String filename = Paths.get(mountPoint).getFileName().toString();
+                 for (String fn: filename.split(",")) {
+                     switch (fn) {
+                         case MEMORY_CTRL: // fall through
+                         case CPU_CTRL:
+                         case CPUSET_CTRL:
+                         case CPUACCT_CTRL:
+                         case BLKIO_CTRL:
+                             return true;
+                         default: break; // ignore not recognized controllers
+                     }
+                 }
+             } else if (fsType.equals("cgroup2")) {
+                 return true;
+             }
+         }
+         return false;
+    }
+
     public static final class CgroupTypeResult {
         private final boolean isCgroupV2;
         private final boolean anyControllersEnabled;
diff --git a/test/hotspot/jtreg/containers/cgroup/CgroupSubsystemFactory.java b/test/hotspot/jtreg/containers/cgroup/CgroupSubsystemFactory.java
index 3bbd1278626..d9b3c8b7fbe 100644
--- a/test/hotspot/jtreg/containers/cgroup/CgroupSubsystemFactory.java
+++ b/test/hotspot/jtreg/containers/cgroup/CgroupSubsystemFactory.java
@@ -62,6 +62,7 @@ public class CgroupSubsystemFactory {
     private Path cgroupv1MntInfoNonZeroHierarchy;
     private Path cgroupv1MntInfoDoubleCpuset;
     private Path cgroupv1MntInfoDoubleCpuset2;
+    private Path cgroupv1MntInfoSystemdOnly;
     private String mntInfoEmpty = "";
     private Path cgroupV1SelfCgroup;
     private Path cgroupV2SelfCgroup;
@@ -128,6 +129,9 @@ public class CgroupSubsystemFactory {
             "pids    3   80  1";
     private String mntInfoCgroupsV2Only =
             "28 21 0:25 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 none rw,seclabel,nsdelegate";
+    private String mntInfoCgroupsV1SystemdOnly =
+            "35 26 0:26 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup systemd rw,name=systemd\n" +
+            "26 18 0:19 / /sys/fs/cgroup rw,relatime - tmpfs none rw,size=4k,mode=755\n";
 
     private void setup() {
         try {
@@ -168,6 +172,9 @@ public class CgroupSubsystemFactory {
 
             cgroupv1MntInfoDoubleCpuset2 = Paths.get(existingDirectory.toString(), "mnt_info_cgroupv1_double_cpuset2");
             Files.writeString(cgroupv1MntInfoDoubleCpuset2, mntInfoCgroupv1DoubleCpuset2);
+
+            cgroupv1MntInfoSystemdOnly = Paths.get(existingDirectory.toString(), "mnt_info_cgroupv1_systemd_only");
+            Files.writeString(cgroupv1MntInfoSystemdOnly, mntInfoCgroupsV1SystemdOnly);
         } catch (IOException e) {
             throw new RuntimeException(e);
         }
@@ -195,6 +202,16 @@ public class CgroupSubsystemFactory {
         System.out.println("testCgroupv1MultipleCpusetMounts PASSED!");
     }
 
+    public void testCgroupv1SystemdOnly(WhiteBox wb) {
+        String procCgroups = cgroupv1CgInfoZeroHierarchy.toString();
+        String procSelfCgroup = cgroupV1SelfCgroup.toString();
+        String procSelfMountinfo = cgroupv1MntInfoSystemdOnly.toString();
+        int retval = wb.validateCgroup(procCgroups, procSelfCgroup, procSelfMountinfo);
+        Asserts.assertEQ(INVALID_CGROUPS_NO_MOUNT, retval, "Only systemd mounted. Invalid");
+        Asserts.assertFalse(isValidCgroup(retval));
+        System.out.println("testCgroupv1SystemdOnly PASSED!");
+    }
+
     public void testCgroupv1NoMounts(WhiteBox wb) {
         String procCgroups = cgroupv1CgInfoZeroHierarchy.toString();
         String procSelfCgroup = cgroupV1SelfCgroup.toString();
@@ -261,6 +278,7 @@ public class CgroupSubsystemFactory {
         CgroupSubsystemFactory test = new CgroupSubsystemFactory();
         test.setup();
         try {
+            test.testCgroupv1SystemdOnly(wb);
             test.testCgroupv1NoMounts(wb);
             test.testCgroupv2(wb);
             test.testCgroupV1Hybrid(wb);
diff --git a/test/jdk/jdk/internal/platform/cgroup/TestCgroupSubsystemFactory.java b/test/jdk/jdk/internal/platform/cgroup/TestCgroupSubsystemFactory.java
index 6f47ea06024..e8597bb4dd8 100644
--- a/test/jdk/jdk/internal/platform/cgroup/TestCgroupSubsystemFactory.java
+++ b/test/jdk/jdk/internal/platform/cgroup/TestCgroupSubsystemFactory.java
@@ -56,6 +56,7 @@ public class TestCgroupSubsystemFactory {
     private Path cgroupv2MntInfoZeroHierarchy;
     private Path cgroupv1CgInfoNonZeroHierarchy;
     private Path cgroupv1MntInfoNonZeroHierarchy;
+    private Path cgroupv1MntInfoSystemdOnly;
     private String mntInfoEmpty = "";
     private String cgroupsZeroHierarchy =
             "#subsys_name hierarchy num_cgroups enabled\n" +
@@ -98,6 +99,9 @@ public class TestCgroupSubsystemFactory {
             "pids    3   80  1";
     private String mntInfoCgroupsV2Only =
             "28 21 0:25 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw,seclabel,nsdelegate";
+    private String mntInfoCgroupsV1SystemdOnly =
+            "35 26 0:26 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup systemd rw,name=systemd\n" +
+            "26 18 0:19 / /sys/fs/cgroup rw,relatime - tmpfs none rw,size=4k,mode=755\n";
 
     @Before
     public void setup() {
@@ -118,6 +122,9 @@ public class TestCgroupSubsystemFactory {
 
             cgroupv1MntInfoNonZeroHierarchy = Paths.get(existingDirectory.toString(), "mountinfo_non_zero");
             Files.writeString(cgroupv1MntInfoNonZeroHierarchy, mntInfoHybrid);
+
+            cgroupv1MntInfoSystemdOnly = Paths.get(existingDirectory.toString(), "mountinfo_cgroupv1_systemd_only");
+            Files.writeString(cgroupv1MntInfoSystemdOnly, mntInfoCgroupsV1SystemdOnly);
         } catch (IOException e) {
             throw new RuntimeException(e);
         }
@@ -132,6 +139,15 @@ public class TestCgroupSubsystemFactory {
         }
     }
 
+    @Test
+    public void testCgroupv1SystemdOnly() throws IOException {
+        String cgroups = cgroupv1CgInfoZeroHierarchy.toString();
+        String mountInfo = cgroupv1MntInfoSystemdOnly.toString();
+        Optional<CgroupTypeResult> result = CgroupSubsystemFactory.determineType(mountInfo, cgroups);
+
+        assertTrue("zero hierarchy ids with no *relevant* controllers mounted", result.isEmpty());
+    }
+
     @Test
     public void testHybridCgroupsV1() throws IOException {
         String cgroups = cgroupv1CgInfoNonZeroHierarchy.toString();