Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core: do not imply PrivateTmp with DynamicUser, create a private tmpfs instead #32724

Merged
merged 2 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 7 additions & 6 deletions man/systemd.exec.xml
Original file line number Diff line number Diff line change
Expand Up @@ -671,12 +671,13 @@
part of a unit for which dynamic users/groups are enabled do not leave files or directories owned by
these users/groups around, as a different unit might get the same UID/GID assigned later on, and thus
gain access to these files or directories. If <varname>DynamicUser=</varname> is enabled,
<varname>RemoveIPC=</varname> and <varname>PrivateTmp=</varname> are implied (and cannot be turned
off). This ensures that the lifetime of IPC objects and temporary files created by the executed
processes is bound to the runtime of the service, and hence the lifetime of the dynamic
user/group. Since <filename>/tmp/</filename> and <filename>/var/tmp/</filename> are usually the only
world-writable directories on a system this ensures that a unit making use of dynamic user/group
allocation cannot leave files around after unit termination. Furthermore
<varname>RemoveIPC=</varname> is implied (and cannot be turned off). This ensures that the lifetime
of IPC objects and temporary files created by the executed processes is bound to the runtime of the
service, and hence the lifetime of the dynamic user/group. Since <filename>/tmp/</filename> and
<filename>/var/tmp/</filename> are usually the only world-writable directories on a system, unless
<varname>PrivateTmp=</varname> is manually enabled, those directories will be placed on a private
bluca marked this conversation as resolved.
Show resolved Hide resolved
tmpfs filesystem, as this ensures that a unit making use of dynamic user/group allocation cannot
leave files around after unit termination. Furthermore
<varname>NoNewPrivileges=</varname> and <varname>RestrictSUIDSGID=</varname> are implicitly enabled
(and cannot be disabled), to ensure that processes invoked cannot take benefit or create SUID/SGID
files or directories. Moreover <varname>ProtectSystem=strict</varname> and
Expand Down
4 changes: 2 additions & 2 deletions src/core/dbus-execute.c
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("NoExecPaths", "as", NULL, offsetof(ExecContext, no_exec_paths), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ExecSearchPath", "as", NULL, offsetof(ExecContext, exec_search_path), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_propagation_flag), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_private_tmp, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectClock", "b", bus_property_get_bool, offsetof(ExecContext, protect_clock), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
Expand Down Expand Up @@ -1736,7 +1736,7 @@ int bus_exec_context_set_transient_property(
return bus_set_transient_unsigned(u, name, &c->tty_cols, message, flags, error);

if (streq(name, "PrivateTmp"))
return bus_set_transient_bool(u, name, &c->private_tmp, message, flags, error);
return bus_set_transient_private_tmp(u, name, &c->private_tmp, message, flags, error);
bluca marked this conversation as resolved.
Show resolved Hide resolved

if (streq(name, "PrivateDevices"))
return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error);
Expand Down
39 changes: 39 additions & 0 deletions src/core/dbus-util.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,45 @@ int bus_set_transient_usec_internal(
return 1;
}

int bus_set_transient_private_tmp(
Unit *u,
const char *name,
PrivateTmp *p,
sd_bus_message *message,
UnitWriteFlags flags,
sd_bus_error *error) {

int v, r;

assert(p);

r = sd_bus_message_read(message, "b", &v);
if (r < 0)
return r;

if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
*p = v ? PRIVATE_TMP_CONNECTED : PRIVATE_TMP_OFF;
unit_write_settingf(u, flags, name, "%s=%s", name, yes_no(v));
}

return 1;
}

int bus_property_get_private_tmp(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {

PrivateTmp *p = ASSERT_PTR(userdata);
int b = *p != PRIVATE_TMP_OFF;

return sd_bus_message_append_basic(reply, 'b', &b);
}

int bus_verify_manage_units_async_full(
Unit *u,
const char *verb,
Expand Down
3 changes: 3 additions & 0 deletions src/core/dbus-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "sd-bus.h"

#include "dissect-image.h"
#include "execute.h"
#include "unit.h"

int bus_property_get_triggered_unit(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
Expand Down Expand Up @@ -244,6 +245,7 @@ int bus_set_transient_string(Unit *u, const char *name, char **p, sd_bus_message
int bus_set_transient_bool(Unit *u, const char *name, bool *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
int bus_set_transient_tristate(Unit *u, const char *name, int *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
int bus_set_transient_usec_internal(Unit *u, const char *name, usec_t *p, bool fix_0, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
int bus_set_transient_private_tmp(Unit *u, const char *name, PrivateTmp *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
static inline int bus_set_transient_usec(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) {
return bus_set_transient_usec_internal(u, name, p, false, message, flags, error);
}
Expand All @@ -255,3 +257,4 @@ int bus_verify_manage_units_async_full(Unit *u, const char *verb, const char *po
int bus_read_mount_options(sd_bus_message *message, sd_bus_error *error, MountOptions **ret_options, char **ret_format_str, const char *separator);

int bus_property_get_activation_details(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
int bus_property_get_private_tmp(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
16 changes: 9 additions & 7 deletions src/core/exec-invoke.c
Original file line number Diff line number Diff line change
Expand Up @@ -3054,7 +3054,7 @@ static int apply_mount_namespace(
_cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL,
**read_write_paths_cleanup = NULL;
_cleanup_free_ char *creds_path = NULL, *incoming_dir = NULL, *propagate_dir = NULL,
*extension_dir = NULL, *host_os_release_stage = NULL, *root_image = NULL, *root_dir = NULL;
*private_namespace_dir = NULL, *host_os_release_stage = NULL, *root_image = NULL, *root_dir = NULL;
const char *tmp_dir = NULL, *var_tmp_dir = NULL;
char **read_write_paths;
bool setup_os_release_symlink;
Expand Down Expand Up @@ -3108,7 +3108,7 @@ static int apply_mount_namespace(
* to world users. Inside of it there's a /tmp that is sticky, and that's the one we want to
* use here. This does not apply when we are using /run/systemd/empty as fallback. */

if (context->private_tmp && runtime && runtime->shared) {
if (context->private_tmp == PRIVATE_TMP_CONNECTED && runtime && runtime->shared) {
if (streq_ptr(runtime->shared->tmp_dir, RUN_SYSTEMD_EMPTY))
tmp_dir = runtime->shared->tmp_dir;
else if (runtime->shared->tmp_dir)
Expand Down Expand Up @@ -3145,8 +3145,8 @@ static int apply_mount_namespace(
if (!incoming_dir)
return -ENOMEM;

extension_dir = strdup("/run/systemd/unit-extensions");
if (!extension_dir)
private_namespace_dir = strdup("/run/systemd");
if (!private_namespace_dir)
return -ENOMEM;

/* If running under a different root filesystem, propagate the host's os-release. We make a
Expand All @@ -3159,7 +3159,7 @@ static int apply_mount_namespace(
} else {
assert(params->runtime_scope == RUNTIME_SCOPE_USER);

if (asprintf(&extension_dir, "/run/user/" UID_FMT "/systemd/unit-extensions", geteuid()) < 0)
if (asprintf(&private_namespace_dir, "/run/user/" UID_FMT "/systemd", geteuid()) < 0)
return -ENOMEM;
bluca marked this conversation as resolved.
Show resolved Hide resolved

if (setup_os_release_symlink) {
Expand Down Expand Up @@ -3205,6 +3205,8 @@ static int apply_mount_namespace(
.temporary_filesystems = context->temporary_filesystems,
.n_temporary_filesystems = context->n_temporary_filesystems,

.private_tmp = context->private_tmp,

.mount_images = context->mount_images,
.n_mount_images = context->n_mount_images,
.mount_image_policy = context->mount_image_policy ?: &image_policy_service,
Expand All @@ -3225,7 +3227,7 @@ static int apply_mount_namespace(

.propagate_dir = propagate_dir,
.incoming_dir = incoming_dir,
.extension_dir = extension_dir,
.private_namespace_dir = private_namespace_dir,
.notify_socket = root_dir || root_image ? params->notify_socket : NULL,
.host_os_release_stage = host_os_release_stage,

Expand Down Expand Up @@ -3857,7 +3859,7 @@ static bool exec_context_need_unprivileged_private_users(
return false;

return context->private_users ||
context->private_tmp ||
context->private_tmp != PRIVATE_TMP_OFF ||
context->private_devices ||
context->private_network ||
context->network_namespace_path ||
Expand Down
9 changes: 4 additions & 5 deletions src/core/execute-serialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -1840,7 +1840,7 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
if (r < 0)
return r;

r = serialize_bool_elide(f, "exec-context-private-tmp", c->private_tmp);
r = serialize_item(f, "exec-context-private-tmp", private_tmp_to_string(c->private_tmp));
if (r < 0)
return r;

Expand Down Expand Up @@ -2720,10 +2720,9 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
if (r < 0)
return r;
} else if ((val = startswith(l, "exec-context-private-tmp="))) {
r = parse_boolean(val);
if (r < 0)
return r;
c->private_tmp = r;
c->private_tmp = private_tmp_from_string(val);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, the field was serialized with serialize_bool_elide(). Hence, "yes" needs to be handled here for backward compatibility.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For execute-serialize we don't maintain serialization format, but instead pin the executor to make sure compatible version is used, no?

Copy link
Member

@yuwata yuwata Jun 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uh? I did not know that. I have not followed well how we serialize/deserialize after we introduced executor...

So, is this not called when PID1 is reexecuted?? At least with old systemd, deserialization is done after reexec, hence, the deserialization logic needed to support old format.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please check out the comments in execute-serialization.h.

if (c->private_tmp < 0)
return c->private_tmp;
} else if ((val = startswith(l, "exec-context-private-devices="))) {
r = parse_boolean(val);
if (r < 0)
Expand Down
11 changes: 7 additions & 4 deletions src/core/execute.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,10 @@ bool exec_needs_mount_namespace(
if (!IN_SET(context->mount_propagation_flag, 0, MS_SHARED))
return true;

if (context->private_tmp && runtime && runtime->shared && (runtime->shared->tmp_dir || runtime->shared->var_tmp_dir))
if (context->private_tmp == PRIVATE_TMP_DISCONNECTED)
return true;

if (context->private_tmp == PRIVATE_TMP_CONNECTED && runtime && runtime->shared && (runtime->shared->tmp_dir || runtime->shared->var_tmp_dir))
return true;

if (context->private_devices ||
Expand Down Expand Up @@ -964,7 +967,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
prefix, empty_to_root(c->root_directory),
prefix, yes_no(c->root_ephemeral),
prefix, yes_no(c->non_blocking),
prefix, yes_no(c->private_tmp),
prefix, private_tmp_to_string(c->private_tmp),
prefix, yes_no(c->private_devices),
prefix, yes_no(c->protect_kernel_tunables),
prefix, yes_no(c->protect_kernel_modules),
Expand Down Expand Up @@ -2155,12 +2158,12 @@ static int exec_shared_runtime_make(
assert(id);

/* It is not necessary to create ExecSharedRuntime object. */
if (!exec_needs_network_namespace(c) && !exec_needs_ipc_namespace(c) && !c->private_tmp) {
if (!exec_needs_network_namespace(c) && !exec_needs_ipc_namespace(c) && c->private_tmp != PRIVATE_TMP_CONNECTED) {
*ret = NULL;
return 0;
}

if (c->private_tmp &&
if (c->private_tmp == PRIVATE_TMP_CONNECTED &&
!(prefixed_path_strv_contains(c->inaccessible_paths, "/tmp") &&
(prefixed_path_strv_contains(c->inaccessible_paths, "/var/tmp") ||
prefixed_path_strv_contains(c->inaccessible_paths, "/var")))) {
Expand Down
2 changes: 1 addition & 1 deletion src/core/execute.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ struct ExecContext {
int private_mounts;
int mount_apivfs;
int memory_ksm;
bool private_tmp;
PrivateTmp private_tmp;
bool private_network;
bool private_devices;
bool private_users;
Expand Down
2 changes: 1 addition & 1 deletion src/core/load-fragment-gperf.gperf.in
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
{{type}}.BindPaths, config_parse_bind_paths, 0, offsetof({{type}}, exec_context)
{{type}}.BindReadOnlyPaths, config_parse_bind_paths, 0, offsetof({{type}}, exec_context)
{{type}}.TemporaryFileSystem, config_parse_temporary_filesystems, 0, offsetof({{type}}, exec_context)
{{type}}.PrivateTmp, config_parse_bool, 0, offsetof({{type}}, exec_context.private_tmp)
{{type}}.PrivateTmp, config_parse_private_tmp, 0, offsetof({{type}}, exec_context)
{{type}}.PrivateDevices, config_parse_bool, 0, offsetof({{type}}, exec_context.private_devices)
{{type}}.ProtectKernelTunables, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_kernel_tunables)
{{type}}.ProtectKernelModules, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_kernel_modules)
Expand Down
28 changes: 28 additions & 0 deletions src/core/load-fragment.c
Original file line number Diff line number Diff line change
Expand Up @@ -5199,6 +5199,34 @@ int config_parse_temporary_filesystems(
}
}

int config_parse_private_tmp(
const char* unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {

ExecContext *c = ASSERT_PTR(data);
int r;

assert(filename);
assert(rvalue);

r = parse_boolean(rvalue);
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse boolean value: %s ignoring", rvalue);
return 0;
}

c->private_tmp = r ? PRIVATE_TMP_CONNECTED : PRIVATE_TMP_OFF;
return 0;
}

int config_parse_bind_paths(
const char *unit,
const char *filename,
Expand Down
1 change: 1 addition & 0 deletions src/core/load-fragment.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_import_credential);
CONFIG_PARSER_PROTOTYPE(config_parse_set_status);
CONFIG_PARSER_PROTOTYPE(config_parse_namespace_path_strv);
CONFIG_PARSER_PROTOTYPE(config_parse_temporary_filesystems);
CONFIG_PARSER_PROTOTYPE(config_parse_private_tmp);
CONFIG_PARSER_PROTOTYPE(config_parse_cpu_quota);
CONFIG_PARSER_PROTOTYPE(config_parse_allowed_cpuset);
CONFIG_PARSER_PROTOTYPE(config_parse_protect_home);
Expand Down