From 810c5f0cc36fc85a5703c5db105762796ba6e67c Mon Sep 17 00:00:00 2001 From: lifubang Date: Mon, 2 Oct 2023 18:01:55 +0800 Subject: [PATCH 1/5] kill all processes in container without private pid ns Signed-off-by: lifubang --- libcontainer/container_linux.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index ae5d4fb46b4..62710c1ab01 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -368,12 +368,6 @@ func (c *Container) Signal(s os.Signal) error { if err != nil { return err } - // To avoid a PID reuse attack, don't kill non-running container. - switch status { - case Running, Created, Paused: - default: - return ErrNotRunning - } // When a container has its own PID namespace, inside it the init PID // is 1, and thus it is handled specially by the kernel. In particular, @@ -383,8 +377,19 @@ func (c *Container) Signal(s os.Signal) error { // OTOH, if PID namespace is shared, we should kill all pids to avoid // leftover processes. if s == unix.SIGKILL && !c.config.Namespaces.IsPrivate(configs.NEWPID) { + if pids, err := c.cgroupManager.GetAllPids(); c.ignoreCgroupError(err) != nil { + return err + } else if len(pids) == 0 { + return ErrNotRunning + } err = signalAllProcesses(c.cgroupManager, unix.SIGKILL) } else { + // To avoid a PID reuse attack, don't kill non-running container. + switch status { + case Running, Created, Paused: + default: + return ErrNotRunning + } err = c.initProcess.signal(s) } if err != nil { From 120dad41376af412c6f6c6dace0fbd7f2b39b42e Mon Sep 17 00:00:00 2001 From: lifubang Date: Mon, 2 Oct 2023 18:02:14 +0800 Subject: [PATCH 2/5] kill all processes in container without private pid ns before destory Signed-off-by: lifubang --- libcontainer/container_linux.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 62710c1ab01..11645b648c6 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -873,6 +873,26 @@ func (c *Container) newInitConfig(process *Process) *initConfig { func (c *Container) Destroy() error { c.m.Lock() defer c.m.Unlock() + if !c.config.Namespaces.IsPrivate(configs.NEWPID) { + const retries = 10 + for i := 0; i < retries; i++ { + pids, err := c.cgroupManager.GetAllPids() + if c.ignoreCgroupError(err) != nil { + return err + } + if len(pids) > 0 { + if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); c.ignoreCgroupError(err) != nil { + return err + } + if i == retries-1 { + return errors.New("some processes are still running in the container") + } + time.Sleep(100 * time.Millisecond) + } else { + break + } + } + } return c.state.destroy() } From 04fc3fe069550365d1a006bca8afc33eda8ebb6b Mon Sep 17 00:00:00 2001 From: lifubang Date: Tue, 10 Oct 2023 11:10:05 +0800 Subject: [PATCH 3/5] increase the retry times from 5 to 10 when removing cgroup paths Signed-off-by: lifubang --- libcontainer/cgroups/utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go index 965c607c682..b59d55c129a 100644 --- a/libcontainer/cgroups/utils.go +++ b/libcontainer/cgroups/utils.go @@ -259,7 +259,7 @@ func RemovePath(path string) error { // If after all there are not removed cgroups - appropriate error will be // returned. func RemovePaths(paths map[string]string) (err error) { - const retries = 5 + const retries = 10 delay := 10 * time.Millisecond for i := 0; i < retries; i++ { if i != 0 { From 48ff07c2ee1c375116ec34c5a648d53d60f83b79 Mon Sep 17 00:00:00 2001 From: lifubang Date: Tue, 10 Oct 2023 11:34:09 +0800 Subject: [PATCH 4/5] never ignore cgroup destroy error for runc-delete Signed-off-by: lifubang --- checkpoint.go | 4 +++- delete.go | 7 ++----- libcontainer/state_linux.go | 3 +++ utils_linux.go | 10 +++++----- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/checkpoint.go b/checkpoint.go index a8a27f248bc..f49b346dbf6 100644 --- a/checkpoint.go +++ b/checkpoint.go @@ -70,7 +70,9 @@ checkpointed.`, err = container.Checkpoint(options) if err == nil && !(options.LeaveRunning || options.PreDump) { // Destroy the container unless we tell CRIU to keep it. - destroy(container) + if err := destroy(container); err != nil { + logrus.Warn(err) + } } return err }, diff --git a/delete.go b/delete.go index 682101ccad8..38c6f88200e 100644 --- a/delete.go +++ b/delete.go @@ -18,8 +18,7 @@ func killContainer(container *libcontainer.Container) error { for i := 0; i < 100; i++ { time.Sleep(100 * time.Millisecond) if err := container.Signal(unix.Signal(0)); err != nil { - destroy(container) - return nil + return destroy(container) } } return errors.New("container init still running") @@ -72,7 +71,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for } switch s { case libcontainer.Stopped: - destroy(container) + return destroy(container) case libcontainer.Created: return killContainer(container) default: @@ -81,7 +80,5 @@ status of "ubuntu01" as "stopped" the following will delete resources held for } return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s) } - - return nil }, } diff --git a/libcontainer/state_linux.go b/libcontainer/state_linux.go index 8d8f31d3678..aa406767126 100644 --- a/libcontainer/state_linux.go +++ b/libcontainer/state_linux.go @@ -36,6 +36,9 @@ type containerState interface { func destroy(c *Container) error { err := c.cgroupManager.Destroy() + if err != nil { + return err + } if c.intelRdtManager != nil { if ierr := c.intelRdtManager.Destroy(); err == nil { err = ierr diff --git a/utils_linux.go b/utils_linux.go index 0f787cb3387..14ab69b955b 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -82,10 +82,8 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { return lp, nil } -func destroy(container *libcontainer.Container) { - if err := container.Destroy(); err != nil { - logrus.Error(err) - } +func destroy(container *libcontainer.Container) error { + return container.Destroy() } // setupIO modifies the given process config according to the options. @@ -289,7 +287,9 @@ func (r *runner) run(config *specs.Process) (int, error) { func (r *runner) destroy() { if r.shouldDestroy { - destroy(r.container) + if err := destroy(r.container); err != nil { + logrus.Warn(err) + } } } From 9225eaa6b195fc25b72d712ad4997761743ff6bf Mon Sep 17 00:00:00 2001 From: lifubang Date: Mon, 6 Nov 2023 19:33:10 +0800 Subject: [PATCH 5/5] add a testcase for runc delete the container with host pid ns Signed-off-by: lifubang --- tests/integration/delete.bats | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/integration/delete.bats b/tests/integration/delete.bats index e9b80b64d06..887ef8b94ab 100644 --- a/tests/integration/delete.bats +++ b/tests/integration/delete.bats @@ -195,3 +195,29 @@ EOF # Expect "no such unit" exit code. run -4 systemctl status $user "$SD_UNIT_NAME" } + +@test "runc delete [with host pid namespace]" { + requires root + + update_config ' .linux.namespaces -= [{"type": "pid"}] + | .process.terminal = false + | .process.args |= ["/bin/sleep", "1d"]' + + __runc run -d test_host_pidns + [ "$status" -eq 0 ] + testcontainer test_host_pidns running + + runc state test_host_pidns + pid=$(echo $output | jq .pid) + + __runc exec -d test_host_pidns sleep 2d + [ "$status" -eq 0 ] + testcontainer test_host_pidns running + + kill -9 $pid + [ "$status" -eq 0 ] + testcontainer test_host_pidns stopped + + runc delete test_host_pidns + [ "$status" -eq 0 ] +}