diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index e6894f48357..d4e11896a86 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -364,10 +364,6 @@ func (c *Container) start(process *Process) (retErr error) { func (c *Container) Signal(s os.Signal) error { c.m.Lock() defer c.m.Unlock() - // To avoid a PID reuse attack, don't kill non-running container. - if !c.hasInit() { - return ErrNotRunning - } // When a container has its own PID namespace, inside it the init PID // is 1, and thus it is handled specially by the kernel. In particular, @@ -375,14 +371,19 @@ func (c *Container) Signal(s os.Signal) error { // all other processes in that PID namespace (see pid_namespaces(7)). // // OTOH, if PID namespace is shared, we should kill all pids to avoid - // leftover processes. - var err error + // leftover processes. Handle this special case here. if s == unix.SIGKILL && !c.config.Namespaces.IsPrivate(configs.NEWPID) { - err = signalAllProcesses(c.cgroupManager, unix.SIGKILL) - } else { - err = c.initProcess.signal(s) + if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil { + return fmt.Errorf("unable to kill all processes: %w", err) + } + return nil } - if err != nil { + + // To avoid a PID reuse attack, don't kill non-running container. + if !c.hasInit() { + return ErrNotRunning + } + if err := c.initProcess.signal(s); err != nil { return fmt.Errorf("unable to signal init: %w", err) } if s == unix.SIGKILL { diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 624e0199e9a..293f7e64115 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -661,6 +661,9 @@ func setupPersonality(config *configs.Config) error { // signalAllProcesses freezes then iterates over all the processes inside the // manager's cgroups sending the signal s to them. func signalAllProcesses(m cgroups.Manager, s unix.Signal) error { + if !m.Exists() { + return ErrNotRunning + } // Use cgroup.kill, if available. if s == unix.SIGKILL { if p := m.Path(""); p != "" { // Either cgroup v2 or hybrid. diff --git a/tests/integration/kill.bats b/tests/integration/kill.bats index 8f2f4a7b4e8..5d82107d2a5 100644 --- a/tests/integration/kill.bats +++ b/tests/integration/kill.bats @@ -10,6 +10,7 @@ function teardown() { teardown_bundle } +# shellcheck disable=SC2030 @test "kill detached busybox" { # run busybox detached runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox @@ -35,35 +36,36 @@ function teardown() { [ "$status" -eq 0 ] } -# This is roughly the same as TestPIDHostInitProcessWait in libcontainer/integration. -@test "kill KILL [host pidns]" { - # kill -a currently requires cgroup freezer. +test_host_pidns_kill() { requires cgroups_freezer + [ $EUID -ne 0 ] && requires rootless_cgroup update_config ' .linux.namespaces -= [{"type": "pid"}]' set_cgroups_path - if [ $EUID -ne 0 ]; then - # kill --all requires working cgroups. - requires rootless_cgroup - update_config ' .mounts |= map((select(.type == "proc") - | .type = "none" - | .source = "/proc" - | .options = ["rbind", "nosuid", "nodev", "noexec"] - ) // .)' - fi runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox + # shellcheck disable=SC2031 [ "$status" -eq 0 ] + cgpath=$(get_cgroup_path "pids") + init_pid=$(cat "$cgpath"/cgroup.procs) # Start a few more processes. for _ in 1 2 3 4 5; do __runc exec -d test_busybox sleep 1h - [ "$status" -eq 0 ] done + if [ -v KILL_INIT ]; then + # Now kill the container's init process. Since the container do + # not have own PID ns, its init is no special and the container + # will still be up and running (except for rootless container + # AND systemd cgroup driver AND systemd > v245, when systemd + # kills the container; see "kill KILL [host pidns + init gone]" + # below). + kill -9 "$init_pid" + fi + # Get the list of all container processes. - path=$(get_cgroup_path "pids") - pids=$(cat "$path"/cgroup.procs) + pids=$(cat "$cgpath"/cgroup.procs) echo "pids: $pids" # Sanity check -- make sure all processes exist. for p in $pids; do @@ -71,14 +73,42 @@ function teardown() { done runc kill test_busybox KILL + # shellcheck disable=SC2031 [ "$status" -eq 0 ] wait_for_container 10 1 test_busybox stopped # Make sure all processes are gone. - pids=$(cat "$path"/cgroup.procs) || true # OK if cgroup is gone + pids=$(cat "$cgpath"/cgroup.procs) || true # OK if cgroup is gone echo "pids: $pids" - for p in $pids; do - run ! kill -0 "$p" - [[ "$output" = *"No such process" ]] - done + [ -z "$pids" ] +} + +# This is roughly the same as TestPIDHostInitProcessWait in libcontainer/integration. +# The differences are: +# +# 1. Here we use separate processes to create and to kill a container, so the +# processes inside a container are not children of "runc kill". +# +# 2. We hit different codepaths (nonChildProcess.signal rather than initProcess.signal). +@test "kill KILL [host pidns]" { + unset KILL_INIT + test_host_pidns_kill +} + +# Same as above plus: +# 3. Test runc kill on a container whose init process is gone (issue 4047). +@test "kill KILL [host pidns + init gone]" { + # Apparently, for rootless test, when using systemd cgroup manager, + # newer versions of systemd clean up the container as soon as its init + # process is gone. This is all fine and dandy, except it prevents us to + # test this case, thus we skip the test. + # + # It is not entirely clear which systemd version got this feature: + # v245 works fine, and v249 does not. + if [ $EUID -ne 0 ] && [ -v RUNC_USE_SYSTEMD ] && [ "$(systemd_version)" -gt 245 ]; then + skip "rootless+systemd conflicts with systemd > 245" + fi + KILL_INIT=1 + test_host_pidns_kill + unset KILL_INIT }