From c70d78811fab1262aa744e8c6dc58c1a248db31a Mon Sep 17 00:00:00 2001 From: Dapeng Gao Date: Tue, 29 Oct 2024 13:53:26 +0000 Subject: [PATCH 1/2] c18n: Rename and generalise c18n_return_address Turn all use sites of __builtin_return_address(0) to use the newly defined rtld_get_return_address macro, which is overridden to take into account the existence of trampolines when c18n is enabled. --- libexec/rtld-elf/rtld.c | 38 +++++++----------------------------- libexec/rtld-elf/rtld.h | 14 +++++++++++++ libexec/rtld-elf/rtld_c18n.h | 6 ------ 3 files changed, 21 insertions(+), 37 deletions(-) diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index 918dcb785113..61611becd86d 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -4482,15 +4482,8 @@ do_dlsym(void *handle, const char *name, void *retaddr, const Ver_Entry *ve, void * dlsym(void *handle, const char *name) { - void *retaddr; - -#ifdef CHERI_LIB_C18N - retaddr = c18n_return_address(); -#else - retaddr = __builtin_return_address(0); -#endif - - return (do_dlsym(handle, name, retaddr, NULL, SYMLOOK_DLSYM)); + return (do_dlsym(handle, name, rtld_get_return_address(), NULL, + SYMLOOK_DLSYM)); } dlfunc_t @@ -4500,15 +4493,9 @@ dlfunc(void *handle, const char *name) void *d; dlfunc_t f; } rv; - void *retaddr; - -#ifdef CHERI_LIB_C18N - retaddr = c18n_return_address(); -#else - retaddr = __builtin_return_address(0); -#endif - rv.d = do_dlsym(handle, name, retaddr, NULL, SYMLOOK_DLSYM); + rv.d = do_dlsym(handle, name, rtld_get_return_address(), NULL, + SYMLOOK_DLSYM); return (rv.f); } @@ -4516,20 +4503,13 @@ void * dlvsym(void *handle, const char *name, const char *version) { Ver_Entry ventry; - void *retaddr; ventry.name = version; ventry.file = NULL; ventry.hash = elf_hash(version); ventry.flags= 0; - -#ifdef CHERI_LIB_C18N - retaddr = c18n_return_address(); -#else - retaddr = __builtin_return_address(0); -#endif - - return (do_dlsym(handle, name, retaddr, &ventry, SYMLOOK_DLSYM)); + return (do_dlsym(handle, name, rtld_get_return_address(), &ventry, + SYMLOOK_DLSYM)); } int @@ -4635,11 +4615,7 @@ dlinfo(void *handle, int request, void *p) if (handle == NULL || handle == RTLD_SELF) { void *retaddr; -#ifdef CHERI_LIB_C18N - retaddr = c18n_return_address(); -#else - retaddr = __builtin_return_address(0); /* __GNUC__ only */ -#endif + retaddr = rtld_get_return_address(); if ((obj = obj_from_addr(retaddr)) == NULL) _rtld_error("Cannot determine caller's shared object"); } else diff --git a/libexec/rtld-elf/rtld.h b/libexec/rtld-elf/rtld.h index 0b49324ee898..0fac8e8144bc 100644 --- a/libexec/rtld-elf/rtld.h +++ b/libexec/rtld-elf/rtld.h @@ -83,6 +83,20 @@ __BEGIN_DECLS typeof (Y) y_ = (Y); \ (x_ > y_) ? x_ : y_; }) +#ifdef CHERI_LIB_C18N +#define rtld_get_return_address() __builtin_return_address(0) +#else +#define rtld_get_return_address() ({ \ + void *__retaddr = __builtin_return_address(0); \ + if (C18N_ENABLED) { \ + struct trusted_frame *__tf = get_trusted_stk(); \ + if (c18n_is_tramp((uintptr_t)__retaddr, __tf)) \ + __retaddr = __tf->state.pc; \ + } \ + __retaddr; \ +}) +#endif + #define NEW(type) ((type *) xmalloc(sizeof(type))) #define CNEW(type) ((type *) xcalloc(1, sizeof(type))) diff --git a/libexec/rtld-elf/rtld_c18n.h b/libexec/rtld-elf/rtld_c18n.h index 6a4c5bf3829a..423696de96c8 100644 --- a/libexec/rtld-elf/rtld_c18n.h +++ b/libexec/rtld-elf/rtld_c18n.h @@ -263,12 +263,6 @@ func_sig_legal(struct func_sig sig) /* * APIs */ -/* - * This macro can only be used in a function directly invoked by a trampoline. - */ -#define c18n_return_address() (C18N_ENABLED ? \ - get_trusted_stk()->state.pc : __builtin_return_address(0)) - void *_rtld_sandbox_code(void *, struct func_sig); void *_rtld_safebox_code(void *, struct func_sig); From f8ade6d053a567bda0d7877537bf63c779130266 Mon Sep 17 00:00:00 2001 From: Dapeng Gao Date: Wed, 10 Jul 2024 10:39:45 +0100 Subject: [PATCH 2/2] c18n: Clear correct number of return values during tail-call In the following example, bar makes a tail-call to foo, which returns a value that is observable to the caller of bar, even though bar returns nothing. void *foo(); void bar() { foo(); } When bar is called, previous versions of the trampoline clears return value registers as if foo is being called directly, leaking a capability. Instead, clear the maximum number of return value registers as required for both foo and bar. --- libexec/rtld-elf/aarch64/rtld_c18n_asm.S | 140 ++++++++++--------- libexec/rtld-elf/aarch64/rtld_c18n_machdep.c | 27 +++- libexec/rtld-elf/rtld_c18n.h | 5 +- 3 files changed, 100 insertions(+), 72 deletions(-) diff --git a/libexec/rtld-elf/aarch64/rtld_c18n_asm.S b/libexec/rtld-elf/aarch64/rtld_c18n_asm.S index eb25aa0ef4c8..4161c2b81916 100644 --- a/libexec/rtld-elf/aarch64/rtld_c18n_asm.S +++ b/libexec/rtld-elf/aarch64/rtld_c18n_asm.S @@ -148,8 +148,8 @@ ENTRY(create_untrusted_stk) /* * NON-STANDARD CALLING CONVENTION * - * c19: Callee to be tail-called - * w20: Callee's compartment ID + * w19: Callee's compartment ID + * c26: Callee to be tail-called * * The function resolves the callee's stack, installs it, and tail-calls * the callee. @@ -167,7 +167,7 @@ ENTRY(create_untrusted_stk) save_arguments - mov w0, w20 + mov w0, w19 bl resolve_untrusted_stk_impl mov c10, c0 @@ -189,14 +189,14 @@ ENTRY(create_untrusted_stk) mov x18, xzr /* - * All callee-saved registers are safe except c23 + * All callee-saved registers are safe except c28 */ - mov x23, xzr + mov x28, xzr #ifdef __ARM_MORELLO_PURECAP_BENCHMARK_ABI - br x19 + br x26 #else - brr c19 + brr c26 #endif END(create_untrusted_stk) @@ -280,85 +280,95 @@ TRAMP(tramp_push_frame) * Store the caller's current stack top in the stack lookup table. */ str c15, [STACK_TABLE_C, w12, uxtw #0] - - stp c19, c20, [TRUSTED_STACK_C, #(-CAP_WIDTH * TRUSTED_FRAME_SIZE + CAP_WIDTH * 2)] - /* - * Load the target capability. - */ -1: ldr c19, #0 /* To be patched at runtime */ - /* - * Get the callee's compartment ID. - */ -2: movz w20, #0 /* To be patched at runtime */ /* * Get the length of the stack lookup table. */ gclen x13, STACK_TABLE_C + + stp c19, c20, [TRUSTED_STACK_C, #(-CAP_WIDTH * TRUSTED_FRAME_SIZE + CAP_WIDTH * 2)] /* - * Use subs instead of cmp to clear a register tag. + * Get the callee's compartment ID. */ - subs x14, x13, x20 - +1: movz w19, #0 /* To be patched at runtime */ /* - * Save the callee's current stack top and old stack top. + * Use subs instead of cmp to clear a capability tag. */ - stp c15, c16, [TRUSTED_STACK_C, #(-CAP_WIDTH * TRUSTED_FRAME_SIZE + TRUSTED_FRAME_SP_OSP)] + subs x14, x13, x19 /* * If the stack lookup table index is out-of-bounds, set it to zero. */ - csel w16, w20, wzr, hi + csel w20, w19, wzr, hi /* * Load the callee's stack if the stack lookup table index is within * bounds. Otherwise the resolver will be loaded. */ - ldr c17, [STACK_TABLE_C, w16, uxtw #0] + ldr c17, [STACK_TABLE_C, w20, uxtw #0] + /* + * The tag of the return capability is set iff the condition flag is cs. + */ + chktgd c30 + /* + * Compare the return address to the landing address. The call is a + * tail-call iff the condition flag is eq. + */ + ccmp x30, x11, #0, cs stp c21, c22, [TRUSTED_STACK_C, #(-CAP_WIDTH * TRUSTED_FRAME_SIZE + CAP_WIDTH * 4)] /* - * The resolver is loaded iff the condition flag is ne. + * Get the offset to the next trusted frame. */ - gcperm x21, c17 - ands x22, x21, #(1 << 15) + mov x21, #-(CAP_WIDTH * TRUSTED_FRAME_SIZE) /* - * If the resolver is loaded, keep the stack unchanged. Otherwise, - * install the callee's stack. + * If the call is a tail-call, do not bump the trusted stack pointer. */ - csel c15, c15, c17, ne + csel x22, xzr, x21, eq stp c23, c24, [TRUSTED_STACK_C, #(-CAP_WIDTH * TRUSTED_FRAME_SIZE + CAP_WIDTH * 6)] /* - * If the resolver is loaded, set the branch target to it. Otherwise, - * install the callee. + * If the call is a tail-call, get the number of return value registers + * of the caller. */ - csel c23, c17, c19, ne + csinv x23, x10, xzr, eq /* - * Compare the return address to the landing address. + * Get the landing address. */ - subs x24, x30, x11 +2: adr c24, #0 /* To be patched at runtime */ stp c25, c26, [TRUSTED_STACK_C, #(-CAP_WIDTH * TRUSTED_FRAME_SIZE + CAP_WIDTH * 8)] /* - * Get the tag of the return capability. + * Compute the number of return value registers. If the call is a tail- + * call, it is the minimum of that of the caller and the callee. */ - gctag x25, c30 +3: ubfm x25, x23, #48, #0 /* To be patched at runtime */ /* - * Get the offset to the next trusted frame. + * Load the target capability. */ - mov x26, #-(CAP_WIDTH * TRUSTED_FRAME_SIZE) +4: ldr c26, #0 /* To be patched at runtime */ + + /* + * Save the caller's current stack top and old stack top. + */ + stp c15, c16, [TRUSTED_STACK_C, #(-CAP_WIDTH * TRUSTED_FRAME_SIZE + TRUSTED_FRAME_SP_OSP)] /* - * The call is a tail-call iff the condition flag is eq. + * Get the permissions of the loaded value. */ - ccmp x25, #1, #0, eq + gcperm x16, c17 stp c27, c28, [TRUSTED_STACK_C, #(-CAP_WIDTH * TRUSTED_FRAME_SIZE + CAP_WIDTH * 10)] /* - * If the call is a tail call, do not bump the trusted stack pointer. + * The resolver is loaded iff the condition flag is ne. */ - csel x27, xzr, x26, eq + ands x27, x16, #(1 << 15) /* - * Get the landing address. + * If the resolver is loaded, keep the stack unchanged. Otherwise, + * install the callee's stack. + */ + csel c15, c15, c17, ne + /* + * If the resolver is loaded, set the branch target to it. Otherwise, + * install the callee. */ -3: adr c28, #0 /* To be patched at runtime */ + csel c28, c17, c26, ne /* * Save the address of the previous trusted frame and the compartment ID @@ -374,21 +384,22 @@ TRAMP(tramp_push_frame) * information about the callee regardless of whether the call is a * tail-call. */ - add TRUSTED_STACK_C, TRUSTED_STACK_C, x27 + add TRUSTED_STACK_C, TRUSTED_STACK_C, x22 /* * Save the landing address. */ - str x28, [TRUSTED_STACK_C, #TRUSTED_FRAME_LANDING] + str x24, [TRUSTED_STACK_C, #TRUSTED_FRAME_LANDING] /* - * Get the number of return value registers. + * Combine the caller's compartment ID and the number of return value + * registers. */ -4: add w28, w20, #0, lsl #12 /* To be patched at runtime */ + orr w24, w19, w25, lsl #16 /* * Save the callee's compartment ID and the number of return value * registers. */ - str w28, [TRUSTED_STACK_C, #TRUSTED_FRAME_CALLEE] + str w24, [TRUSTED_STACK_C, #TRUSTED_FRAME_CALLEE] msr TRUSTED_STACK, TRUSTED_STACK_C @@ -402,10 +413,10 @@ TRAMP(tramp_push_frame) set_untrusted_stk c15 TRAMPEND(tramp_push_frame) -PATCH_POINT(tramp_push_frame, target, 1b) -PATCH_POINT(tramp_push_frame, cid, 2b) -PATCH_POINT(tramp_push_frame, landing, 3b) -PATCH_POINT(tramp_push_frame, ret_args, 4b) +PATCH_POINT(tramp_push_frame, cid, 1b) +PATCH_POINT(tramp_push_frame, landing, 2b) +PATCH_POINT(tramp_push_frame, n_rets, 3b) +PATCH_POINT(tramp_push_frame, target, 4b) /* * Save the address of the current frame to c29 so that unwinders can locate it. @@ -421,6 +432,7 @@ TRAMPEND(tramp_update_fp_untagged) TRAMP(tramp_count_entry) 1: ldr c24, #0 /* To be patched at runtime */ + movz w25, #1 stadd w25, [c24] TRAMPEND(tramp_count_entry) @@ -444,9 +456,9 @@ PATCH_POINT(tramp_call_hook, header, 3b) TRAMP(tramp_invoke_exe) #ifdef __ARM_MORELLO_PURECAP_BENCHMARK_ABI - blr x23 + blr x28 #else - blr c23 + blr c28 #endif TRAMPEND(tramp_invoke_exe) @@ -473,9 +485,9 @@ TRAMP(tramp_invoke_res) clrtag TRUSTED_STACK_C, TRUSTED_STACK_C #ifdef __ARM_MORELLO_PURECAP_BENCHMARK_ABI - blr x23 + blr x28 #else - blrr c23 + blrr c28 #endif TRAMPEND(tramp_invoke_res) @@ -532,20 +544,20 @@ TRAMP(tramp_pop_frame) /* * Extract the number of return value registers. */ - ubfx x13, x10, #48, #2 + ubfx x13, x10, #50, #2 /* * Clear unused return value registers. The registers to clear are * encoded as follows: - * - None: 0b00 + * - None: 0b11 * - c1 only: 0b01 - * - c0 and c1: 0b1x + * - c0 and c1: 0b00 * Use comparison and csel to avoid branching. * - * Use subs instead of cmp to clear a register tag. + * Use subs instead of cmp to clear a capability tag. */ subs w14, w13, #0b01 - csel c0, czr, c0, hi - csel c1, czr, c1, hs + csel c0, czr, c0, lo + csel c1, czr, c1, ls /* * Clear temporary registers. diff --git a/libexec/rtld-elf/aarch64/rtld_c18n_machdep.c b/libexec/rtld-elf/aarch64/rtld_c18n_machdep.c index 729b1b5facb8..79297e2cae5b 100644 --- a/libexec/rtld-elf/aarch64/rtld_c18n_machdep.c +++ b/libexec/rtld-elf/aarch64/rtld_c18n_machdep.c @@ -100,10 +100,10 @@ tramp_compile(char **entry, const struct tramp_data *data) *PATCH_INS(PATCH_OFF(tramp, name)) |= _value; \ } while (0) -#define PATCH_ADD(tramp, name, value) \ +#define PATCH_UBFM(tramp, name, value) \ do { \ uint32_t _value = (value); \ - _value = ((_value & 0xfff) << 10); \ + _value = ((_value & 0x3f) << 10); \ *PATCH_INS(PATCH_OFF(tramp, name)) |= _value; \ } while (0) @@ -145,11 +145,28 @@ tramp_compile(char **entry, const struct tramp_data *data) size += offsetof(struct tramp_header, entry); COPY(push_frame); - PATCH_LDR_IMM(push_frame, target, target_off); PATCH_MOV(push_frame, cid, cid_to_index(data->defobj->compart_id).val); - PATCH_ADD(push_frame, ret_args, - data->sig.valid ? data->sig.ret_args << (16 - 12) : 0); landing_off = PATCH_OFF(push_frame, landing); + /* + * The trampoline computes the number of return value registers and + * stores it in the trusted frame, encoded as follows: + * - TWO: 0b1111 + * - ONE: 0b0111 + * - NONE: 0b0011 + * - INDIRECT: 0b0001 + * + * The computation starts with bits [51:48] of a mask, which encodes the + * maximum number of return value registers that can be used. This is + * usually 0b1111 but can be different if the call is a tail-call. + * + * We then compute the minumum of this number and the number of return + * value registers actually used by the callee. In the trampoline, this + * is done by a ubfm instruction that extracts a suffix from bits + * [51:48] of the mask. + */ + PATCH_UBFM(push_frame, n_rets, + 51 - (data->sig.valid ? data->sig.ret_args : 0)); + PATCH_LDR_IMM(push_frame, target, target_off); if (executive || ld_compartment_unwind != NULL) COPY(update_fp); diff --git a/libexec/rtld-elf/rtld_c18n.h b/libexec/rtld-elf/rtld_c18n.h index 423696de96c8..e8665fd57685 100644 --- a/libexec/rtld-elf/rtld_c18n.h +++ b/libexec/rtld-elf/rtld_c18n.h @@ -152,10 +152,9 @@ struct trusted_frame { */ stk_table_index callee; /* - * Number of return value registers, encoded in enum tramp_ret_args + * Number of return value registers with architecture-specific encoding */ - uint8_t ret_args : 2; - uint16_t reserved : 14; + uint16_t n_rets; /* * This field contains the code address in the trampoline that the * callee should return to. This is used by trampolines to detect cross-