Skip to content

Commit

Permalink
Support idmap mounts on volumes
Browse files Browse the repository at this point in the history
This PR adds support for idmap mounts as specified in the runtime-spec.

We open the idmap source paths and call mount_setattr() in runc PARENT,
as we need privileges in the init userns for that, and then sends the
fds to the child process. For this fd passing we use the same mechanism
used in other parts of thecode, the _LIBCONTAINER_ env vars.

The mount is finished (unix.MoveMount) from go code, inside the userns,
so we reuse all the prepareBindMount() security checks and the remount
logic for some flags too.

This PR only supports idmap mounts when userns are used AND the mappings
are the same specified for the userns mapping. This limitation is to
simplify the initial implementation, as all our users so far only need
this, and we can avoid sending over netlink the mappings, creating a
userns with this custom mapping, etc. Future PRs will remove this
limitation.

As the idmap case is quite similar to the existing mount sources case we
open with O_PATH, some simple refactors are done to share more code and
to group the slices of fds in go code. To that end, we created the
mountFds struct, and add all the slices of fds there.

Co-authored-by: Francis Laniel <[email protected]>
Signed-off-by: Rodrigo Campos <[email protected]>
  • Loading branch information
rata and eiffel-fl committed Feb 9, 2023
1 parent 8528f73 commit d92ec12
Show file tree
Hide file tree
Showing 7 changed files with 274 additions and 48 deletions.
122 changes: 97 additions & 25 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -528,47 +528,97 @@ func (c *Container) shouldSendMountSources() bool {
return false
}

func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range c.config.Namespaces {
if ns.Path != "" {
nsMaps[ns.Type] = ns.Path
// shouldSendIdmapSources says whether the child process must setup idmap mounts with
// the mount_setattr already done in the host user namespace.
func (c *Container) shouldSendIdmapSources() bool {
// For the time being we require userns to be in use.
if !c.config.Namespaces.Contains(configs.NEWUSER) {
return false
}

// nsexec.c mount_setattr() requires CAP_SYS_ADMIN in the initial userns.
if c.config.RootlessEUID {
return false
}

// We need to send sources if there are idmap bind-mounts.
for _, m := range c.config.Mounts {
if m.IsIDMap() {
return true
}
}
_, sharePidns := nsMaps[configs.NEWPID]
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard)

return false
}

func cmdAddEnvFds(cmd *exec.Cmd, env string, fds []int) error {
fdsJSON, err := json.Marshal(fds)
if err != nil {
return nil, err
return fmt.Errorf("Error creating %v: %w", env, err)
}
cmd.Env = append(cmd.Env, env+"="+string(fdsJSON))
return nil
}

if c.shouldSendMountSources() {
// Elements on this slice will be paired with mounts (see StartInitialization() and
// prepareRootfs()). This slice MUST have the same size as c.config.Mounts.
mountFds := make([]int, len(c.config.Mounts))
for i, m := range c.config.Mounts {
if !m.IsBind() {
// Non bind-mounts do not use an fd.
mountFds[i] = -1
continue
}
func (c *Container) sendSources(cmd *exec.Cmd, messageSockPair filePair) error {
if !c.shouldSendMountSources() && !c.shouldSendIdmapSources() {
return nil
}

// Elements on these slices will be paired with mounts (see StartInitialization() and
// prepareRootfs()). These slices MUST have the same size as c.config.Mounts.
mountFds := make([]int, len(c.config.Mounts))
idmapFds := make([]int, len(c.config.Mounts))
for i, m := range c.config.Mounts {
// The -1 fd is ignored later.
mountFds[i] = -1
idmapFds[i] = -1
switch {
case m.IsBindOnly() && c.shouldSendMountSources():
// The fd passed here will not be used: nsexec.c will overwrite it with dup3(). We just need
// to allocate a fd so that we know the number to pass in the environment variable. The fd
// must not be closed before cmd.Start(), so we reuse messageSockPair.child because the
// lifecycle of that fd is already taken care of.
cmd.ExtraFiles = append(cmd.ExtraFiles, messageSockPair.child)
mountFds[i] = stdioFdCount + len(cmd.ExtraFiles) - 1
case m.IsIDMap() && c.shouldSendIdmapSources():
// Same note as in the other case applies here.
cmd.ExtraFiles = append(cmd.ExtraFiles, messageSockPair.child)
idmapFds[i] = stdioFdCount + len(cmd.ExtraFiles) - 1
}
}

mountFdsJson, err := json.Marshal(mountFds)
if err != nil {
return nil, fmt.Errorf("Error creating _LIBCONTAINER_MOUNT_FDS: %w", err)
if c.shouldSendMountSources() {
if err := cmdAddEnvFds(cmd, "_LIBCONTAINER_MOUNT_FDS", mountFds); err != nil {
return err
}
}
if c.shouldSendIdmapSources() {
if err := cmdAddEnvFds(cmd, "_LIBCONTAINER_IDMAP_FDS", idmapFds); err != nil {
return err
}
}

cmd.Env = append(cmd.Env,
"_LIBCONTAINER_MOUNT_FDS="+string(mountFdsJson),
)
return nil
}

func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range c.config.Namespaces {
if ns.Path != "" {
nsMaps[ns.Type] = ns.Path
}
}
_, sharePidns := nsMaps[configs.NEWPID]
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard)
if err != nil {
return nil, err
}

// Send sources for idmap and mount fds.
if err := c.sendSources(cmd, messageSockPair); err != nil {
return nil, err
}

init := &initProcess{
Expand Down Expand Up @@ -2237,6 +2287,28 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
})
}

// Idmap mount sources to open.
if it == initStandard && c.shouldSendIdmapSources() {
var mounts []byte
for _, m := range c.config.Mounts {
if m.IsIDMap() {
// TODO: why do we need to duplicate this that is already done in
// libcontainer/specconv/spec_linux.go
if strings.IndexByte(m.Source, 0) >= 0 {
return nil, fmt.Errorf("mount source string contains null byte: %q", m.Source)
}

mounts = append(mounts, []byte(m.Source)...)
}
mounts = append(mounts, byte(0))
}

r.AddData(&Bytemsg{
Type: IdmapSourcesAttr,
Value: mounts,
})
}

return bytes.NewReader(r.Serialize()), nil
}

Expand Down
22 changes: 14 additions & 8 deletions libcontainer/factory_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,13 @@ func StartInitialization() (err error) {
}

// Get mount files (O_PATH).
mountFds, err := parseMountFds()
mountSrcFds, err := parseFdsFromEnv("_LIBCONTAINER_MOUNT_FDS")
if err != nil {
return err
}

// Get idmap fds.
idmapFds, err := parseFdsFromEnv("_LIBCONTAINER_IDMAP_FDS")
if err != nil {
return err
}
Expand All @@ -228,7 +234,7 @@ func StartInitialization() (err error) {
}
}()

i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd, mountFds)
i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
if err != nil {
return err
}
Expand Down Expand Up @@ -304,17 +310,17 @@ func validateID(id string) error {
return nil
}

func parseMountFds() ([]int, error) {
fdsJSON := os.Getenv("_LIBCONTAINER_MOUNT_FDS")
func parseFdsFromEnv(envVar string) ([]int, error) {
fdsJSON := os.Getenv(envVar)
if fdsJSON == "" {
// Always return the nil slice if no fd is present.
return nil, nil
}

var mountFds []int
if err := json.Unmarshal([]byte(fdsJSON), &mountFds); err != nil {
return nil, fmt.Errorf("Error unmarshalling _LIBCONTAINER_MOUNT_FDS: %w", err)
var fds []int
if err := json.Unmarshal([]byte(fdsJSON), &fds); err != nil {
return nil, fmt.Errorf("Error unmarshalling %v: %w", envVar, err)
}

return mountFds, nil
return fds, nil
}
19 changes: 15 additions & 4 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ type network struct {
TempVethPeerName string `json:"temp_veth_peer_name"`
}

type mountFds struct {
// Fds to use as source when mounting
// Size should be the same as container mounts, as it will be paired.
// The value -1 is used when no fd is needed for the mount.
// Can't have a valid fd in the same position that other slices in this struct.
// We need to use only one of these fds on any single mount.
sourceFds []int
// Idem sourceFds, but fds of already created idmap mounts, to use with unix.MoveMount().
idmapFds []int
}

// initConfig is used for transferring parameters from Exec() to Init()
type initConfig struct {
Args []string `json:"args"`
Expand Down Expand Up @@ -75,7 +86,7 @@ type initer interface {
Init() error
}

func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFds []int) (initer, error) {
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFds mountFds) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
Expand All @@ -85,9 +96,9 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
}
switch t {
case initSetns:
// mountFds must be nil in this case. We don't mount while doing runc exec.
if mountFds != nil {
return nil, errors.New("mountFds must be nil; can't mount from exec")
// mount and idmap fds must be nil in this case. We don't mount while doing runc exec.
if mountFds.sourceFds != nil || mountFds.idmapFds != nil {
return nil, errors.New("mount and idmap fds must be nil; can't mount from exec")
}

return &linuxSetnsInit{
Expand Down
1 change: 1 addition & 0 deletions libcontainer/message_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const (
UidmapPathAttr uint16 = 27288
GidmapPathAttr uint16 = 27289
MountSourcesAttr uint16 = 27290
IdmapSourcesAttr uint16 = 27291
)

type Int32msg struct {
Expand Down
Loading

0 comments on commit d92ec12

Please sign in to comment.