From 32e5fd92268d41c0eca10a23163aac0c3f6b00a7 Mon Sep 17 00:00:00 2001 From: Kailun Qin Date: Thu, 5 Aug 2021 13:16:33 -0400 Subject: [PATCH] specs-go/config: add systemd cgroup support The `--systemd-cgroup` flag and the systemd cgroup path convention currently implemented in `runc/crun` should be added to the spec. This patch adds in the spec: * the option to enable systemd cgroup; * the configs of systemd units and the containing slice to map to the cgroup tree. Fixes https://github.com/opencontainers/runtime-spec/issues/1021 Signed-off-by: Kailun Qin --- config-linux.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++ specs-go/config.go | 37 +++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/config-linux.md b/config-linux.md index 37ea951f7..30e5cc43c 100644 --- a/config-linux.md +++ b/config-linux.md @@ -214,6 +214,54 @@ Runtimes MAY attach the container process to additional cgroup controllers beyon } ``` +### Systemd Cgroup + +* **`systemdCgroup`** *(bool, OPTIONAL)* - enables or disables systemd cgroup support. + If enabled (`true`), the container runtime switches to the systemd cgroup driver for creating + cgroups and setting cgroup limits. + +* **`systemdCgroupsPath`** (object, OPTIONAL) - sets the systemd cgroups path configurations. + By specifying with the transient systemd unit to create for the container and the containing + slice which hosts the unit, the systemd units directly map to objects in the cgroup tree. + When these units are activated, they map directly to cgroup paths built from the unit names. + +The following parameters can be specified to set up the `systemdCgroupsPath`: +Each entry has the following structure: + +* **`type`** *(string, REQUIRED)* - type of the systemd unit: `scope` or `slice`. +* **`parentSlice`** *(string, OPTIONAL)* - name of the parent slice with type suffix, under which the container is placed. + Note that `slice` can contain dashes to denote a sub-slice (e.g. `user-1000.slice` is a correct + notation, meaning a subslice of `user.slice`), but it must not contain slashes (e.g. + `user.slice/user-1000.slice` is invalid). + There might be some slices already created by default, for example: + `-.slice` - the root slice; + `system.slice` - the default place for all system services; + `user.slice` - the default place for all user sessions. +* **`name`** *(string, OPTIONAL)* - systemd unit name (without type suffix). + +### Example + +```json +"systemdCgroup": true, +"systemdCgroupsPath": { + "type": "scope", + "parentSlice": "user.slice", + "name": "runtime-foo" +}, +"resources": { + "memory": { + "limit": 100000, + "reservation": 200000 + }, + "devices": [ + { + "allow": false, + "access": "rwm" + } + ] +} +``` + ### Allowed Device list **`devices`** (array of objects, OPTIONAL) configures the [allowed device list][cgroup-v1-devices]. diff --git a/specs-go/config.go b/specs-go/config.go index 6a7a91e55..60002a080 100644 --- a/specs-go/config.go +++ b/specs-go/config.go @@ -163,6 +163,13 @@ type Linux struct { // The path is expected to be relative to the cgroups mountpoint. // If resources are specified, the cgroups at CgroupsPath will be updated based on resources. CgroupsPath string `json:"cgroupsPath,omitempty"` + // SystemdCgroup controls whether to enable systemd cgroup support. + SystemdCgroup bool `json:"systemdCgroup,omitempty"` + // SystemdCgroupsPath specifies the transient systemd unit to create for the container and the + // containing slice which hosts the unit. The systemd units directly map to objects in the + // cgroup tree. When these units are activated, they map directly to cgroup paths built from the + // unit names. + SystemdCgroupsPath LinuxSystemdCgroupsPath `json:"systemdCgroupsPath,omitempty"` // Namespaces contains the namespaces that are created and/or joined by the container Namespaces []LinuxNamespace `json:"namespaces,omitempty"` // Devices are a list of device nodes that are created for the container @@ -184,6 +191,36 @@ type Linux struct { Personality *LinuxPersonality `json:"personality,omitempty"` } +// LinuxSystemdCgroupsPath specifies the transient systemd unit to create for the container and the +// containing slice which hosts the unit. +type LinuxSystemdCgroupsPath struct { + // Type is the type of the systemd unit. + Type SystemdUnitType `json:"type"` + // ParentSlice specifies the name of the parent slice with type suffix, under which the + // container is placed. Some examples below: + // `-.slice` - the root slice; + // `system.slice` - the default place for all system services; + // `user.slice` - the default place for all user sessions. + ParentSlice string `json:"parentSlice,omitempty"` + // Name is the systemd unit name (without type suffix). + Name string `json:"name,omitempty"` +} + +// SystemdUnitType defines the type of the systemd unit. +type SystemdUnitType string + +// SystemdUnitType defines the type of the systemd unit. +const ( + // Scope is a group of externally created processes. + // Scopes encapsulate processes that are started and stopped by arbitrary processes through the + // fork() function and then registered by systemd at runtime. + Scope SystemdUnitType = "scope" + // Slice is a group of hierarchically organized units. + // Slices do not contain processes, they organize a hierarchy in which scopes and services are + // placed. + Slice SystemdUnitType = "slice" +) + // LinuxNamespace is the configuration for a Linux namespace type LinuxNamespace struct { // Type is the type of namespace