Fix cgroup2 mount for rootless case

In case of rootless, cgroup2 mount is not possible (see [1] for more
details), so since commit 9c81440fb5 runc bind-mounts the whole
/sys/fs/cgroup into container.

Problem is, if cgroupns is enabled, /sys/fs/cgroup inside the container
is supposed to show the cgroup files for this cgroup, not the root one.

The fix is to pass through and use the cgroup path in case cgroup2
mount failed, cgroupns is enabled, and the path is non-empty.

Surely this requires the /sys/fs/cgroup mount in the spec, so modify
runc spec --rootless to keep it.

Before:

	$ ./runc run aaa
	# find /sys/fs/cgroup/ -type d
	/sys/fs/cgroup
	/sys/fs/cgroup/user.slice
	/sys/fs/cgroup/user.slice/user-1000.slice
	/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service
	...
	# ls -l /sys/fs/cgroup/cgroup.controllers
	-r--r--r--    1 nobody   nogroup          0 Feb 24 02:22 /sys/fs/cgroup/cgroup.controllers
	# wc -w /sys/fs/cgroup/cgroup.procs
	142 /sys/fs/cgroup/cgroup.procs
	# cat /sys/fs/cgroup/memory.current
	cat: can't open '/sys/fs/cgroup/memory.current': No such file or directory

After:

	# find /sys/fs/cgroup/ -type d
	/sys/fs/cgroup/
	# ls -l /sys/fs/cgroup/cgroup.controllers
	-r--r--r--    1 root     root             0 Feb 24 02:43 /sys/fs/cgroup/cgroup.controllers
	# wc -w /sys/fs/cgroup/cgroup.procs
	2 /sys/fs/cgroup/cgroup.procs
	# cat /sys/fs/cgroup/memory.current
	577536

[1] https://github.com/opencontainers/runc/issues/2158

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
Kir Kolyshkin
2021-02-23 18:27:42 -08:00
parent 3826db196d
commit ff692f289b
4 changed files with 34 additions and 16 deletions
+21 -7
View File
@@ -31,9 +31,11 @@ import (
const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
type mountConfig struct {
root string
label string
cgroupns bool
root string
label string
cgroup2Path string
rootlessCgroups bool
cgroupns bool
}
// needsSetupDev returns true if /dev needs to be set up.
@@ -56,9 +58,11 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
}
mountConfig := &mountConfig{
root: config.Rootfs,
label: config.MountLabel,
cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
root: config.Rootfs,
label: config.MountLabel,
cgroup2Path: iConfig.Cgroup2Path,
rootlessCgroups: iConfig.RootlessCgroups,
cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
}
setupDev := needsSetupDev(config)
for _, m := range config.Mounts {
@@ -307,7 +311,17 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
if err == unix.EPERM || err == unix.EBUSY {
src := fs2.UnifiedMountpoint
return unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
if c.cgroupns && c.cgroup2Path != "" {
// Emulate cgroupns by bind-mounting
// the container cgroup path rather than
// the whole /sys/fs/cgroup.
src = c.cgroup2Path
}
err = unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
if err == unix.ENOENT && c.rootlessCgroups {
err = nil
}
return err
}
return err
}