diff --git a/config.md b/config.md index e2a9dda66..5af29d418 100644 --- a/config.md +++ b/config.md @@ -291,6 +291,33 @@ For Linux-based systems, the `process` object supports the following process-spe This is a per-process setting, where as [`disableOOMKiller`](config-linux.md#memory) is scoped for a memory cgroup. For more information on how these two settings work together, see [the memory cgroup documentation section 10. OOM Contol][cgroup-v1-memory_2]. +* **`scheduler`** (object, OPTIONAL) is an object describing the scheduler properties for the process. The `scheduler` contains the following properties: + + * **`policy`** (string, REQUIRED) represents the scheduling policy. A valid list of values is: + + * `SCHED_OTHER` + * `SCHED_FIFO` + * `SCHED_RR` + * `SCHED_BATCH` + * `SCHED_ISO` + * `SCHED_IDLE` + * `SCHED_DEADLINE` + + * **`nice`** (int32, OPTIONAL) is the nice value for the process, affecting its priority. A lower nice value corresponds to a higher priority. If not set, the runtime must use the value 0. + * **`priority`** (int32, OPTIONAL) represents the static priority of the process, used by real-time policies like SCHED_FIFO and SCHED_RR. If not set, the runtime must use the value 0. + * **`flags`** (array of strings, OPTIONAL) is an array of strings representing scheduling flags. A valid list of values is: + + * `SCHED_FLAG_RESET_ON_FORK` + * `SCHED_FLAG_RECLAIM` + * `SCHED_FLAG_DL_OVERRUN` + * `SCHED_FLAG_KEEP_POLICY` + * `SCHED_FLAG_KEEP_PARAMS` + * `SCHED_FLAG_UTIL_CLAMP_MIN` + * `SCHED_FLAG_UTIL_CLAMP_MAX` + + * **`runtime`** (uint64, OPTIONAL) represents the amount of time in nanoseconds during which the process is allowed to run in a given period, used by the deadline scheduler. If not set, the runtime must use the value 0. + * **`deadline`** (uint64, OPTIONAL) represents the absolute deadline for the process to complete its execution, used by the deadline scheduler. If not set, the runtime must use the value 0. + * **`period`** (uint64, OPTIONAL) represents the length of the period in nanoseconds used for determining the process runtime, used by the deadline scheduler. If not set, the runtime must use the value 0. * **`selinuxLabel`** (string, OPTIONAL) specifies the SELinux label for the process. For more information about SELinux, see [SELinux documentation][selinux]. diff --git a/schema/config-schema.json b/schema/config-schema.json index cf66c6524..b8393d8c4 100644 --- a/schema/config-schema.json +++ b/schema/config-schema.json @@ -147,6 +147,38 @@ "noNewPrivileges": { "type": "boolean" }, + "scheduler": { + "type": "object", + "required": [ + "policy" + ], + "properties": { + "policy": { + "$ref": "defs-linux.json#/definitions/SchedulerPolicy" + }, + "nice": { + "$ref": "defs.json#/definitions/int32" + }, + "priority": { + "$ref": "defs.json#/definitions/int32" + }, + "flags": { + "type": "array", + "items": { + "$ref": "defs-linux.json#/definitions/SchedulerFlag" + } + }, + "runtime": { + "$ref": "defs.json#/definitions/uint64" + }, + "deadline": { + "$ref": "defs.json#/definitions/uint64" + }, + "period": { + "$ref": "defs.json#/definitions/uint64" + } + } + }, "rlimits": { "type": "array", "items": { diff --git a/schema/defs-linux.json b/schema/defs-linux.json index 31f971a7f..ce43ecf96 100644 --- a/schema/defs-linux.json +++ b/schema/defs-linux.json @@ -323,6 +323,30 @@ "$ref": "defs.json#/definitions/uint32" } } + }, + "SchedulerPolicy": { + "type": "string", + "enum": [ + "SCHED_OTHER", + "SCHED_FIFO", + "SCHED_RR", + "SCHED_BATCH", + "SCHED_ISO", + "SCHED_IDLE", + "SCHED_DEADLINE" + ] + }, + "SchedulerFlag": { + "type": "string", + "enum": [ + "SCHED_FLAG_RESET_ON_FORK", + "SCHED_FLAG_RECLAIM", + "SCHED_FLAG_DL_OVERRUN", + "SCHED_FLAG_KEEP_POLICY", + "SCHED_FLAG_KEEP_PARAMS", + "SCHED_FLAG_UTIL_CLAMP_MIN", + "SCHED_FLAG_UTIL_CLAMP_MAX" + ] } } } diff --git a/specs-go/config.go b/specs-go/config.go index 25f4e6e82..f070bdf39 100644 --- a/specs-go/config.go +++ b/specs-go/config.go @@ -33,6 +33,34 @@ type Spec struct { ZOS *ZOS `json:"zos,omitempty" platform:"zos"` } +// Scheduler represents the scheduling attributes for a process. It is based on +// the Linux sched_setattr(2) syscall. +type Scheduler struct { + // Policy represents the scheduling policy (e.g., SCHED_FIFO, SCHED_RR, SCHED_OTHER). + Policy LinuxSchedulerPolicy `json:"policy"` + + // Nice is the nice value for the process, which affects its priority. + Nice int32 `json:"nice,omitempty"` + + // Priority represents the static priority of the process. + Priority int32 `json:"priority,omitempty"` + + // Flags is an array of scheduling flags. + Flags []LinuxSchedulerFlag `json:"flags,omitempty"` + + // The following ones are used by the DEADLINE scheduler. + + // Runtime is the amount of time in nanoseconds during which the process + // is allowed to run in a given period. + Runtime uint64 `json:"runtime,omitempty"` + + // Deadline is the absolute deadline for the process to complete its execution. + Deadline uint64 `json:"deadline,omitempty"` + + // Period is the length of the period in nanoseconds used for determining the process runtime. + Period uint64 `json:"period,omitempty"` +} + // Process contains information to start a specific application inside the container. type Process struct { // Terminal creates an interactive terminal for the container. @@ -60,6 +88,8 @@ type Process struct { ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"` // Specify an oom_score_adj for the container. OOMScoreAdj *int `json:"oomScoreAdj,omitempty" platform:"linux"` + // Scheduler specifies the scheduling attributes for a process + Scheduler *Scheduler `json:"scheduler,omitempty" platform:"linux"` // SelinuxLabel specifies the selinux context that the container process is run as. SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"` } @@ -789,3 +819,43 @@ type ZOSDevice struct { // Gid of the device. GID *uint32 `json:"gid,omitempty"` } + +// LinuxSchedulerPolicy represents different scheduling policies used with the Linux Scheduler +type LinuxSchedulerPolicy string + +const ( + // SchedOther is the default scheduling policy + SchedOther LinuxSchedulerPolicy = "SCHED_OTHER" + // SchedFIFO is the First-In-First-Out scheduling policy + SchedFIFO LinuxSchedulerPolicy = "SCHED_FIFO" + // SchedRR is the Round-Robin scheduling policy + SchedRR LinuxSchedulerPolicy = "SCHED_RR" + // SchedBatch is the Batch scheduling policy + SchedBatch LinuxSchedulerPolicy = "SCHED_BATCH" + // SchedISO is the Isolation scheduling policy + SchedISO LinuxSchedulerPolicy = "SCHED_ISO" + // SchedIdle is the Idle scheduling policy + SchedIdle LinuxSchedulerPolicy = "SCHED_IDLE" + // SchedDeadline is the Deadline scheduling policy + SchedDeadline LinuxSchedulerPolicy = "SCHED_DEADLINE" +) + +// LinuxSchedulerFlag represents the flags used by the Linux Scheduler. +type LinuxSchedulerFlag string + +const ( + // SchedFlagResetOnFork represents the reset on fork scheduling flag + SchedFlagResetOnFork LinuxSchedulerFlag = "SCHED_FLAG_RESET_ON_FORK" + // SchedFlagReclaim represents the reclaim scheduling flag + SchedFlagReclaim LinuxSchedulerFlag = "SCHED_FLAG_RECLAIM" + // SchedFlagDLOverrun represents the deadline overrun scheduling flag + SchedFlagDLOverrun LinuxSchedulerFlag = "SCHED_FLAG_DL_OVERRUN" + // SchedFlagKeepPolicy represents the keep policy scheduling flag + SchedFlagKeepPolicy LinuxSchedulerFlag = "SCHED_FLAG_KEEP_POLICY" + // SchedFlagKeepParams represents the keep parameters scheduling flag + SchedFlagKeepParams LinuxSchedulerFlag = "SCHED_FLAG_KEEP_PARAMS" + // SchedFlagUtilClampMin represents the utilization clamp minimum scheduling flag + SchedFlagUtilClampMin LinuxSchedulerFlag = "SCHED_FLAG_UTIL_CLAMP_MIN" + // SchedFlagUtilClampMin represents the utilization clamp maximum scheduling flag + SchedFlagUtilClampMax LinuxSchedulerFlag = "SCHED_FLAG_UTIL_CLAMP_MAX" +)