package userns import ( "fmt" "os" "sort" "strings" "sync" "syscall" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" "github.com/opencontainers/runc/libcontainer/configs" ) type Mapping struct { UIDMappings []configs.IDMap GIDMappings []configs.IDMap } func (m Mapping) toSys() (uids, gids []syscall.SysProcIDMap) { for _, uid := range m.UIDMappings { uids = append(uids, syscall.SysProcIDMap{ ContainerID: int(uid.ContainerID), HostID: int(uid.HostID), Size: int(uid.Size), }) } for _, gid := range m.GIDMappings { gids = append(gids, syscall.SysProcIDMap{ ContainerID: int(gid.ContainerID), HostID: int(gid.HostID), Size: int(gid.Size), }) } return uids, gids } // id returns a unique identifier for this mapping, agnostic of the order of // the uid and gid mappings (because the order doesn't matter to the kernel). // The set of userns handles is indexed using this ID. func (m Mapping) id() string { uids := make([]string, 0, len(m.UIDMappings)) for _, idmap := range m.UIDMappings { uids = append(uids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size)) } gids := make([]string, 0, len(m.GIDMappings)) for _, idmap := range m.GIDMappings { gids = append(gids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size)) } // We don't care about the sort order -- just sort them. sort.Strings(uids) sort.Strings(gids) return "uid=" + strings.Join(uids, ",") + ";gid=" + strings.Join(gids, ",") } type Handles struct { m sync.Mutex maps map[string]*os.File } // Release all resources associated with this Handle. All existing files // returned from Get() will continue to work even after calling Release(). The // same Handles can be reused after calling Release(). func (hs *Handles) Release() { hs.m.Lock() defer hs.m.Unlock() // Close the files for good measure, though GC will do that for us anyway. for _, file := range hs.maps { _ = file.Close() } hs.maps = nil } func spawnProc(req Mapping) (*os.Process, error) { // We need to spawn a subprocess with the requested mappings, which is // unfortunately quite expensive. The "safe" way of doing this is natively // with Go (and then spawning something like "sleep infinity"), but // execve() is a waste of cycles because we just need some process to have // the right mapping, we don't care what it's executing. The "unsafe" // option of doing a clone() behind the back of Go is probably okay in // theory as long as we just do kill(getpid(), SIGSTOP). However, if we // tell Go to put the new process into PTRACE_TRACEME mode, we can avoid // the exec and not have to faff around with the mappings. // // Note that Go's stdlib does not support newuidmap, but in the case of // id-mapped mounts, it seems incredibly unlikely that the user will be // requesting us to do a remapping as an unprivileged user with mappings // they have privileges over. logrus.Debugf("spawning dummy process for id-mapping %s", req.id()) uidMappings, gidMappings := req.toSys() // We don't need to use /proc/thread-self here because the exe mm of a // thread-group is guaranteed to be the same for all threads by definition. // This lets us avoid having to do runtime.LockOSThread. return os.StartProcess("/proc/self/exe", []string{"runc", "--help"}, &os.ProcAttr{ Sys: &syscall.SysProcAttr{ Cloneflags: unix.CLONE_NEWUSER, UidMappings: uidMappings, GidMappings: gidMappings, GidMappingsEnableSetgroups: false, // Put the process into PTRACE_TRACEME mode to allow us to get the // userns without having a proper execve() target. Ptrace: true, }, }) } func dupFile(f *os.File) (*os.File, error) { newFd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0) if err != nil { return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err) } return os.NewFile(uintptr(newFd), f.Name()), nil } // Get returns a handle to a /proc/$pid/ns/user nsfs file with the requested // mapping. The processes spawned to produce userns nsfds are cached, so if // equivalent user namespace mappings are requested, the same user namespace // will be returned. The caller is responsible for closing the returned file // descriptor. func (hs *Handles) Get(req Mapping) (file *os.File, err error) { hs.m.Lock() defer hs.m.Unlock() if hs.maps == nil { hs.maps = make(map[string]*os.File) } file, ok := hs.maps[req.id()] if !ok { proc, err := spawnProc(req) if err != nil { return nil, fmt.Errorf("failed to spawn dummy process for map %s: %w", req.id(), err) } // Make sure we kill the helper process. We ignore errors because // there's not much we can do about them anyway, and ultimately defer func() { _ = proc.Kill() _, _ = proc.Wait() }() // Stash away a handle to the userns file. This is neater than keeping // the process alive, because Go's GC can handle files much better than // leaked processes, and having long-living useless processes seems // less than ideal. file, err = os.Open(fmt.Sprintf("/proc/%d/ns/user", proc.Pid)) if err != nil { return nil, err } hs.maps[req.id()] = file } // Duplicate the file, to make sure the lifecycle of each *os.File we // return is independent. return dupFile(file) }