From 9e17d5d422fbe0373738374c2cf9b063c2407484 Mon Sep 17 00:00:00 2001 From: Nimrod Shneor Date: Sat, 29 May 2021 13:31:42 +0300 Subject: [PATCH] Add support for cgroups managed by systemd --- src/capabilities.rs | 4 +- src/cgroups/blkio.rs | 4 +- src/cgroups/cgroupsfs_manager.rs | 107 ++++++++++++++++++++++++++++++ src/cgroups/devices.rs | 5 +- src/cgroups/hugetlb.rs | 4 +- src/cgroups/manager.rs | 101 +--------------------------- src/cgroups/memory.rs | 4 +- src/cgroups/mod.rs | 6 +- src/cgroups/network_classifier.rs | 4 +- src/cgroups/pids.rs | 4 +- src/cgroups/systemd_manager.rs | 16 +++++ src/create.rs | 11 ++- src/main.rs | 19 ++++-- src/namespaces.rs | 4 +- src/process/fork.rs | 4 +- src/rootfs.rs | 2 +- 16 files changed, 164 insertions(+), 135 deletions(-) create mode 100644 src/cgroups/cgroupsfs_manager.rs create mode 100644 src/cgroups/systemd_manager.rs diff --git a/src/capabilities.rs b/src/capabilities.rs index 9e95c49f39..cc35aacfe4 100644 --- a/src/capabilities.rs +++ b/src/capabilities.rs @@ -1,6 +1,4 @@ -use crate::{ - command::Command, -}; +use crate::command::Command; use caps::*; use anyhow::Result; diff --git a/src/cgroups/blkio.rs b/src/cgroups/blkio.rs index feb319ab32..f6ad8b29cf 100644 --- a/src/cgroups/blkio.rs +++ b/src/cgroups/blkio.rs @@ -4,9 +4,7 @@ use std::{ path::Path, }; -use crate::{ - cgroups::Controller, -}; +use crate::cgroups::Controller; use oci_spec::{LinuxBlockIo, LinuxResources}; const CGROUP_BLKIO_THROTTLE_READ_BPS: &str = "blkio.throttle.read_bps_device"; diff --git a/src/cgroups/cgroupsfs_manager.rs b/src/cgroups/cgroupsfs_manager.rs new file mode 100644 index 0000000000..4844ebe1fa --- /dev/null +++ b/src/cgroups/cgroupsfs_manager.rs @@ -0,0 +1,107 @@ +use crate::cgroups::Manager; +use std::{collections::HashMap, path::PathBuf}; +use std::{fs::remove_dir, path::Path}; + +use anyhow::Result; +use nix::unistd::Pid; +use procfs::process::Process; + +use super::{ + blkio::Blkio, devices::Devices, hugetlb::Hugetlb, memory::Memory, + network_classifier::NetworkClassifier, network_priority::NetworkPriority, pids::Pids, + Controller, +}; +use crate::{cgroups::ControllerType, utils::PathBufExt}; +use oci_spec::LinuxResources; + +const CONTROLLERS: &[ControllerType] = &[ + ControllerType::Devices, + ControllerType::HugeTlb, + ControllerType::Memory, + ControllerType::Pids, + ControllerType::Blkio, + ControllerType::NetworkPriority, + ControllerType::NetworkClassifier, +]; + +pub struct CGroupsFSManager { + subsystems: HashMap, +} + +impl CGroupsFSManager { + pub fn new(cgroup_path: PathBuf) -> Result { + let mut subsystems = HashMap::::new(); + for subsystem in CONTROLLERS.iter().map(|c| c.to_string()) { + subsystems.insert( + subsystem.to_owned(), + Self::get_subsystem_path(&cgroup_path, &subsystem)?, + ); + } + + Ok(CGroupsFSManager { subsystems }) + } + + fn get_subsystem_path(cgroup_path: &Path, subsystem: &str) -> anyhow::Result { + log::debug!("Get path for subsystem: {}", subsystem); + let mount = Process::myself()? + .mountinfo()? + .into_iter() + .find(|m| { + if m.fs_type == "cgroup" { + // Some systems mount net_prio and net_cls in the same directory + // other systems mount them in their own diretories. This + // should handle both cases. + if subsystem == "net_cls" || subsystem == "net_prio" { + return m.mount_point.ends_with("net_cls,net_prio") + || m.mount_point.ends_with("net_prio,net_cls"); + } + } + m.mount_point.ends_with(subsystem) + }) + .unwrap(); + + let cgroup = Process::myself()? + .cgroups()? + .into_iter() + .find(|c| c.controllers.contains(&subsystem.to_owned())) + .unwrap(); + + let p = if cgroup_path.to_string_lossy().into_owned().is_empty() { + mount + .mount_point + .join_absolute_path(Path::new(&cgroup.pathname))? + } else { + mount.mount_point.join_absolute_path(&cgroup_path)? + }; + + Ok(p) + } +} + +impl Manager for CGroupsFSManager { + fn apply(&self, linux_resources: &LinuxResources, pid: Pid) -> Result<()> { + for subsys in &self.subsystems { + match subsys.0.as_str() { + "devices" => Devices::apply(linux_resources, &subsys.1, pid)?, + "hugetlb" => Hugetlb::apply(linux_resources, &subsys.1, pid)?, + "memory" => Memory::apply(linux_resources, &subsys.1, pid)?, + "pids" => Pids::apply(linux_resources, &subsys.1, pid)?, + "blkio" => Blkio::apply(linux_resources, &subsys.1, pid)?, + "net_prio" => NetworkPriority::apply(linux_resources, &subsys.1, pid)?, + "net_cls" => NetworkClassifier::apply(linux_resources, &subsys.1, pid)?, + _ => continue, + } + } + Ok(()) + } + + fn remove(&self) -> Result<()> { + for cgroup_path in &self.subsystems { + if cgroup_path.1.exists() { + log::debug!("remove cgroup {:?}", cgroup_path.1); + remove_dir(&cgroup_path.1)?; + } + } + Ok(()) + } +} diff --git a/src/cgroups/devices.rs b/src/cgroups/devices.rs index ff64afeda1..b2be6a1b8b 100644 --- a/src/cgroups/devices.rs +++ b/src/cgroups/devices.rs @@ -7,10 +7,7 @@ use std::{ use anyhow::Result; use nix::unistd::Pid; -use crate::{ - cgroups::Controller, - rootfs::default_devices, -}; +use crate::{cgroups::Controller, rootfs::default_devices}; use oci_spec::{LinuxDeviceCgroup, LinuxDeviceType, LinuxResources}; pub struct Devices {} diff --git a/src/cgroups/hugetlb.rs b/src/cgroups/hugetlb.rs index 3e74cd216e..7184b613e9 100644 --- a/src/cgroups/hugetlb.rs +++ b/src/cgroups/hugetlb.rs @@ -7,9 +7,7 @@ use std::{ use anyhow::anyhow; use regex::Regex; -use crate::{ - cgroups::Controller, -}; +use crate::cgroups::Controller; use oci_spec::{LinuxHugepageLimit, LinuxResources}; pub struct Hugetlb {} diff --git a/src/cgroups/manager.rs b/src/cgroups/manager.rs index 8a74ef117e..1bf0e71333 100644 --- a/src/cgroups/manager.rs +++ b/src/cgroups/manager.rs @@ -1,106 +1,11 @@ -use std::{collections::HashMap, path::PathBuf}; -use std::{fs::remove_dir, path::Path}; - use anyhow::Result; use nix::unistd::Pid; use procfs::process::Process; use crate::{cgroups::ControllerType, utils::PathBufExt}; use oci_spec::LinuxResources; -use super::{ - blkio::Blkio, devices::Devices, hugetlb::Hugetlb, memory::Memory, - network_classifier::NetworkClassifier, network_priority::NetworkPriority, pids::Pids, - Controller, -}; - -const CONTROLLERS: &[ControllerType] = &[ - ControllerType::Devices, - ControllerType::HugeTlb, - ControllerType::Memory, - ControllerType::Pids, - ControllerType::Blkio, - ControllerType::NetworkPriority, - ControllerType::NetworkClassifier, -]; - -pub struct Manager { - subsystems: HashMap, -} - -impl Manager { - pub fn new(cgroup_path: PathBuf) -> Result { - let mut subsystems = HashMap::::new(); - for subsystem in CONTROLLERS.iter().map(|c| c.to_string()) { - subsystems.insert( - subsystem.to_owned(), - Self::get_subsystem_path(&cgroup_path, &subsystem)?, - ); - } - - Ok(Manager { subsystems }) - } - - pub fn apply(&self, linux_resources: &LinuxResources, pid: Pid) -> Result<()> { - for subsys in &self.subsystems { - match subsys.0.as_str() { - "devices" => Devices::apply(linux_resources, &subsys.1, pid)?, - "hugetlb" => Hugetlb::apply(linux_resources, &subsys.1, pid)?, - "memory" => Memory::apply(linux_resources, &subsys.1, pid)?, - "pids" => Pids::apply(linux_resources, &subsys.1, pid)?, - "blkio" => Blkio::apply(linux_resources, &subsys.1, pid)?, - "net_prio" => NetworkPriority::apply(linux_resources, &subsys.1, pid)?, - "net_cls" => NetworkClassifier::apply(linux_resources, &subsys.1, pid)?, - _ => continue, - } - } - - Ok(()) - } - - pub fn remove(&self) -> Result<()> { - for cgroup_path in &self.subsystems { - if cgroup_path.1.exists() { - log::debug!("remove cgroup {:?}", cgroup_path.1); - remove_dir(&cgroup_path.1)?; - } - } - - Ok(()) - } - - fn get_subsystem_path(cgroup_path: &Path, subsystem: &str) -> anyhow::Result { - log::debug!("Get path for subsystem: {}", subsystem); - let mount = Process::myself()? - .mountinfo()? - .into_iter() - .find(|m| { - if m.fs_type == "cgroup" { - // Some systems mount net_prio and net_cls in the same directory - // other systems mount them in their own diretories. This - // should handle both cases. - if subsystem == "net_cls" || subsystem == "net_prio" { - return m.mount_point.ends_with("net_cls,net_prio") - || m.mount_point.ends_with("net_prio,net_cls"); - } - } - m.mount_point.ends_with(subsystem) - }) - .unwrap(); - - let cgroup = Process::myself()? - .cgroups()? - .into_iter() - .find(|c| c.controllers.contains(&subsystem.to_owned())) - .unwrap(); - - let p = if cgroup_path.to_string_lossy().into_owned().is_empty() { - mount - .mount_point - .join_absolute_path(Path::new(&cgroup.pathname))? - } else { - mount.mount_point.join_absolute_path(&cgroup_path)? - }; - Ok(p) - } +pub trait Manager { + fn apply(&self, linux_resources: &LinuxResources, pid: Pid) -> Result<()>; + fn remove(&self) -> Result<()>; } diff --git a/src/cgroups/memory.rs b/src/cgroups/memory.rs index 696f7455d7..461b0f530e 100644 --- a/src/cgroups/memory.rs +++ b/src/cgroups/memory.rs @@ -7,9 +7,7 @@ use std::{ use anyhow::{Result, *}; use nix::{errno::Errno, unistd::Pid}; -use crate::{ - cgroups::Controller, -}; +use crate::cgroups::Controller; use oci_spec::{LinuxMemory, LinuxResources}; const CGROUP_MEMORY_SWAP_LIMIT: &str = "memory.memsw.limit_in_bytes"; diff --git a/src/cgroups/mod.rs b/src/cgroups/mod.rs index 9eb044b8a4..f17e33ca66 100644 --- a/src/cgroups/mod.rs +++ b/src/cgroups/mod.rs @@ -1,13 +1,17 @@ +mod blkio; +mod cgroupsfs_manager; mod controller; mod controller_type; mod devices; mod hugetlb; -mod blkio; mod manager; mod memory; mod network_classifier; mod network_priority; mod pids; +mod systemd_manager; +pub use cgroupsfs_manager::CGroupsFSManager; pub use controller::Controller; pub use controller_type::ControllerType; pub use manager::Manager; +pub use systemd_manager::SystemDCGroupManager; diff --git a/src/cgroups/network_classifier.rs b/src/cgroups/network_classifier.rs index c6df7448d6..10563a9f2f 100644 --- a/src/cgroups/network_classifier.rs +++ b/src/cgroups/network_classifier.rs @@ -7,9 +7,7 @@ use std::{ use anyhow::Result; use nix::unistd::Pid; -use crate::{ - cgroups::Controller, -}; +use crate::cgroups::Controller; use oci_spec::{LinuxNetwork, LinuxResources}; pub struct NetworkClassifier {} diff --git a/src/cgroups/pids.rs b/src/cgroups/pids.rs index df6d5b9c9f..61c5fd2719 100644 --- a/src/cgroups/pids.rs +++ b/src/cgroups/pids.rs @@ -6,9 +6,7 @@ use std::{ use anyhow::Result; -use crate::{ - cgroups::Controller, -}; +use crate::cgroups::Controller; use oci_spec::{LinuxPids, LinuxResources}; pub struct Pids {} diff --git a/src/cgroups/systemd_manager.rs b/src/cgroups/systemd_manager.rs new file mode 100644 index 0000000000..b50b7e0951 --- /dev/null +++ b/src/cgroups/systemd_manager.rs @@ -0,0 +1,16 @@ +use crate::cgroups::Manager; +use anyhow::Result; +use nix::unistd::Pid; +use oci_spec::LinuxResources; + +pub struct SystemDCGroupManager; + +impl Manager for SystemDCGroupManager { + fn apply(&self, linux_resources: &LinuxResources, pid: Pid) -> Result<()> { + Ok(()) + } + + fn remove(&self) -> Result<()> { + Ok(()) + } +} diff --git a/src/create.rs b/src/create.rs index 6098cb624e..bd37d9268a 100644 --- a/src/create.rs +++ b/src/create.rs @@ -32,7 +32,7 @@ pub struct Create { } impl Create { - pub fn exec(&self, root_path: PathBuf, command: impl Command) -> Result<()> { + pub fn exec(&self, root_path: PathBuf, systemd_cgroup: bool, command: impl Command) -> Result<()> { let container_dir = root_path.join(&self.container_id); if !container_dir.exists() { fs::create_dir(&container_dir).unwrap(); @@ -79,6 +79,7 @@ impl Create { rootfs, spec, csocketfd, + systemd_cgroup, container, command, )?; @@ -95,6 +96,7 @@ fn run_container>( rootfs: PathBuf, spec: oci_spec::Spec, csocketfd: Option, + systemd_cgroup: bool, container: Container, command: impl Command, ) -> Result { @@ -102,7 +104,10 @@ fn run_container>( let linux = spec.linux.as_ref().unwrap(); let namespaces: Namespaces = linux.namespaces.clone().into(); - let cmanager = cgroups::Manager::new(linux.cgroups_path.clone())?; + let cmanager: Box = match systemd_cgroup { + true => Box::new(cgroups::SystemDCGroupManager), + false => Box::new(cgroups::CGroupsFSManager::new(linux.cgroups_path.clone())?), + }; match fork::fork_first( pid_file, @@ -111,7 +116,7 @@ fn run_container>( .contains(sched::CloneFlags::CLONE_NEWUSER), linux, &container, - &cmanager, + cmanager, )? { Process::Parent(parent) => Ok(Process::Parent(parent)), Process::Child(child) => { diff --git a/src/main.rs b/src/main.rs index a1ddff8bfc..7502ad55d6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,11 +9,12 @@ use anyhow::{bail, Result}; use clap::Clap; use nix::sys::signal as nix_signal; +use youki::cgroups::{CGroupsFSManager, Manager, SystemDCGroupManager}; +use youki::command::linux::LinuxCommand; use youki::container::{Container, ContainerStatus}; use youki::create; use youki::signal; use youki::start; -use youki::{cgroups::Manager, command::linux::LinuxCommand}; /// High-level commandline option definition /// This takes global options as well as individual commands as specified in [OCI runtime-spec](https://github.com/opencontainers/runtime-spec/blob/master/runtime.md) @@ -28,6 +29,9 @@ struct Opts { log: Option, #[clap(long)] log_format: Option, + /// Enable systemd cgroup manager, rather then use the cgroupfs directly. + #[clap(short, long)] + systemd_cgroup: bool, /// command to actually manage container #[clap(subcommand)] subcmd: SubCommand, @@ -97,8 +101,10 @@ fn main() -> Result<()> { let root_path = PathBuf::from(&opts.root); fs::create_dir_all(&root_path)?; + let systemd_cgroup = opts.systemd_cgroup; + match opts.subcmd { - SubCommand::Create(create) => create.exec(root_path, LinuxCommand), + SubCommand::Create(create) => create.exec(root_path, systemd_cgroup, LinuxCommand), SubCommand::Start(start) => start.exec(root_path), SubCommand::Kill(kill) => { // resolves relative paths, symbolic links etc. and get complete path @@ -141,13 +147,16 @@ fn main() -> Result<()> { if container.root.exists() { // remove the directory storing container state fs::remove_dir_all(&container.root)?; - + let spec = oci_spec::Spec::load("config.json")?; // remove the cgroup created for the container // check https://man7.org/linux/man-pages/man7/cgroups.7.html // creating and removing cgroups section for more information on cgroups - - let cmanager = Manager::new(spec.linux.unwrap().cgroups_path)?; + + let cmanager: Box = match systemd_cgroup { + true => Box::new(SystemDCGroupManager), + false => Box::new(CGroupsFSManager::new(spec.linux.unwrap().cgroups_path)?), + }; cmanager.remove()?; } std::process::exit(0) diff --git a/src/namespaces.rs b/src/namespaces.rs index 6266a52be9..521fa7ecff 100644 --- a/src/namespaces.rs +++ b/src/namespaces.rs @@ -6,9 +6,7 @@ use nix::{ unistd::{self, Gid, Uid}, }; -use crate::{ - command::{linux::LinuxCommand, test::TestHelperCommand, Command}, -}; +use crate::command::{linux::LinuxCommand, test::TestHelperCommand, Command}; use oci_spec::{LinuxNamespace, LinuxNamespaceType}; pub struct Namespaces { diff --git a/src/process/fork.rs b/src/process/fork.rs index dbe0802cbc..a25f91f11e 100644 --- a/src/process/fork.rs +++ b/src/process/fork.rs @@ -15,16 +15,16 @@ use nix::unistd; use crate::cgroups::Manager; use crate::container::ContainerStatus; use crate::process::{child, init, parent, Process}; -use oci_spec; use crate::utils; use crate::{cond::Cond, container::Container}; +use oci_spec; pub fn fork_first>( pid_file: Option

, is_userns: bool, linux: &oci_spec::Linux, container: &Container, - cmanager: &Manager, + cmanager: Box, ) -> Result { let ccond = Cond::new()?; diff --git a/src/rootfs.rs b/src/rootfs.rs index 83705f9105..d93d45629c 100644 --- a/src/rootfs.rs +++ b/src/rootfs.rs @@ -13,8 +13,8 @@ use nix::sys::stat::{mknod, umask}; use nix::unistd::{chdir, chown, close, getcwd}; use nix::unistd::{Gid, Uid}; -use oci_spec::{LinuxDevice, LinuxDeviceType, Mount, Spec}; use crate::utils::PathBufExt; +use oci_spec::{LinuxDevice, LinuxDeviceType, Mount, Spec}; pub fn prepare_rootfs(spec: &Spec, rootfs: &Path, bind_devices: bool) -> Result<()> { let mut flags = MsFlags::MS_REC;