From 5da8851d7af51fa5fee545e68f07b9f5ffe80eb5 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 17 Jul 2024 14:57:04 -0700 Subject: [PATCH] runc exec: implement CPU affinity As per - https://github.com/opencontainers/runtime-spec/pull/1253 - https://github.com/opencontainers/runtime-spec/pull/1261 Add some tests (alas it's impossible to test initial CPU affinity without adding debug logging). Signed-off-by: Kir Kolyshkin --- libcontainer/configs/config.go | 3 + libcontainer/process_linux.go | 93 ++++++++++++++++++++++++++++- libcontainer/specconv/spec_linux.go | 5 ++ tests/integration/cpu_affinity.bats | 77 ++++++++++++++++++++++++ 4 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 tests/integration/cpu_affinity.bats diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index 2ce42c474da..0ce0bf03547 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -225,6 +225,9 @@ type Config struct { // IOPriority is the container's I/O priority. IOPriority *IOPriority `json:"io_priority,omitempty"` + + // ExecCPUAffinity is CPU affinity for a non-init process to be run in the container. + ExecCPUAffinity *CPUAffinity `json:"exec_cpu_affinity,omitempty"` } // Scheduler is based on the Linux sched_setattr(2) syscall. diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 1f247baffbd..7a1ab488f73 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -12,6 +12,7 @@ import ( "path/filepath" "runtime" "strconv" + "strings" "sync" "time" @@ -122,13 +123,96 @@ func (p *setnsProcess) signal(sig os.Signal) error { return unix.Kill(p.pid(), s) } +func affToUnix(str string) (*unix.CPUSet, error) { + s := new(unix.CPUSet) + for _, r := range strings.Split(str, ",") { + // Allow extra spaces around. + r = strings.TrimSpace(r) + // Allow empty elements (extra commas). + if r == "" { + continue + } + if r0, r1, found := strings.Cut(r, "-"); found { + start, err := strconv.ParseUint(r0, 10, 32) + if err != nil { + return nil, err + } + end, err := strconv.ParseUint(r1, 10, 32) + if err != nil { + return nil, err + } + if start > end { + return nil, errors.New("invalid range: " + r) + } + for i := int(start); i <= int(end); i++ { + s.Set(i) + } + } else { + val, err := strconv.ParseUint(r, 10, 32) + if err != nil { + return nil, err + } + s.Set(int(val)) + } + } + + return s, nil +} + +// Starts setns process with specified initial CPU affinity. +func (p *setnsProcess) startWithCPUAffinity() error { + aff := p.config.Config.ExecCPUAffinity + if aff == nil || aff.Initial == "" { + return p.cmd.Start() + } + cpus, err := affToUnix(aff.Initial) + if err != nil { + return fmt.Errorf("invalid execCPUAffinity.initial: %w", err) + } + + errCh := make(chan error) + defer close(errCh) + + // Use a goroutine to dedicate an OS thread. + go func() { + // Don't call runtime.UnlockOSThread to terminate the OS thread + // when goroutine exits. + runtime.LockOSThread() + + // Command inherits the CPU affinity. + if err := unix.SchedSetaffinity(unix.Gettid(), cpus); err != nil { + errCh <- fmt.Errorf("setting initial CPU affinity: %w", err) + return + } + + errCh <- p.cmd.Start() + }() + + return <-errCh +} + +func (p *setnsProcess) setFinalCPUAffinity() error { + aff := p.config.Config.ExecCPUAffinity + if aff == nil || aff.Final == "" { + return nil + } + cpus, err := affToUnix(aff.Final) + if err != nil { + return fmt.Errorf("invalid execCPUAffinity.final: %w", err) + } + if err := unix.SchedSetaffinity(p.pid(), cpus); err != nil { + return fmt.Errorf("setting final CPU affinity: %w", err) + } + return nil +} + func (p *setnsProcess) start() (retErr error) { defer p.comm.closeParent() - // get the "before" value of oom kill count + // Get the "before" value of oom kill count. oom, _ := p.manager.OOMKillCount() - err := p.cmd.Start() // https://github.com/opencontainers/runc/pull/3923/commits/afc23e33971b657c4a09c54b16c6139651171aad - // close the child-side of the pipes (controlled by child) + err := p.startWithCPUAffinity() + // Close the child-side of the pipes (controlled by child). p.comm.closeChild() if err != nil { return fmt.Errorf("error starting setns process: %w", err) @@ -196,6 +280,9 @@ func (p *setnsProcess) start() (retErr error) { } } } + if err := p.setFinalCPUAffinity(); err != nil { + return err + } if err := utils.WriteJSON(p.comm.initSockParent, p.config); err != nil { return fmt.Errorf("error writing config to pipe: %w", err) diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 5a09f74b1e3..8d7c6079f40 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -539,6 +539,11 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { ioPriority := *spec.Process.IOPriority config.IOPriority = &ioPriority } + if spec.Process.ExecCPUAffinity != nil { + a := *spec.Process.ExecCPUAffinity + config.ExecCPUAffinity = &a + } + } createHooks(spec, config) config.Version = specs.Version diff --git a/tests/integration/cpu_affinity.bats b/tests/integration/cpu_affinity.bats new file mode 100644 index 00000000000..a27c16452ae --- /dev/null +++ b/tests/integration/cpu_affinity.bats @@ -0,0 +1,77 @@ +#!/usr/bin/env bats +# Exec CPU affinity tests. For more details, see: +# - https://github.com/opencontainers/runtime-spec/pull/1253 + +load helpers + +function setup() { + requires smp cgroups_cpuset + setup_busybox +} + +function teardown() { + teardown_bundle +} + +function all_cpus() { + cat /sys/devices/system/cpu/online +} + +function first_cpu() { + all_cpus | sed 's/[-,].*//g' +} + +@test "runc exec [CPU affinity inherited from runc]" { + requires root # For taskset. + + first="$(first_cpu)" + + # Container's process CPU affinity is inherited from that of runc. + taskset -p -c "$first" $$ + + runc run -d --console-socket "$CONSOLE_SOCKET" ct1 + [ "$status" -eq 0 ] + + # Check init. + runc exec ct1 grep "Cpus_allowed_list:" /proc/1/status + [ "$status" -eq 0 ] + [[ "${lines[0]}" == "Cpus_allowed_list: $first" ]] + + # Check exec. + runc exec ct1 grep "Cpus_allowed_list:" /proc/self/status + [ "$status" -eq 0 ] + [[ "${lines[0]}" == "Cpus_allowed_list: $first" ]] +} + +@test "runc exec [CPU affinity, only initial is set]" { + requires root # For taskset. + + first="$(first_cpu)" + + update_config ".process.execCPUAffinity.initial = \"$first\"" + + runc run -d --console-socket "$CONSOLE_SOCKET" ct1 + [ "$status" -eq 0 ] + + runc exec ct1 grep "Cpus_allowed_list:" /proc/self/status + [ "$status" -eq 0 ] + [[ "${lines[0]}" == "Cpus_allowed_list: $first" ]] +} + +@test "runc exec [CPU affinity, initial and final are set]" { + requires root # For taskset. + + first="$(first_cpu)" + second=$((first+1)) # Hacky; might not work in all environments. + + update_config " .process.execCPUAffinity.initial = \"$first\" + | .process.execCPUAffinity.final = \"$second\"" + + taskset -p -c "$first" $$ + runc run -d --console-socket "$CONSOLE_SOCKET" ct1 + [ "$status" -eq 0 ] + + runc exec ct1 grep "Cpus_allowed_list:" /proc/self/status + [ "$status" -eq 0 ] + [[ "${lines[0]}" == "Cpus_allowed_list: $second" ]] +}