Message ID | 20240829-arm64-gcs-v12-21-42fec947436a@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | arm64/gcs: Provide support for GCS in userspace | expand |
On Thu, Aug 29, 2024 at 12:27:37AM +0100, Mark Brown wrote: > When a new thread is created by a thread with GCS enabled the GCS needs > to be specified along with the regular stack. > > Unfortunately plain clone() is not extensible and existing clone3() > users will not specify a stack so all existing code would be broken if > we mandated specifying the stack explicitly. For compatibility with > these cases and also x86 (which did not initially implement clone3() > support for shadow stacks) if no GCS is specified we will allocate one > so when a thread is created which has GCS enabled allocate one for it. > We follow the extensively discussed x86 implementation and allocate > min(RLIMIT_STACK, 2G). Since the GCS only stores the call stack and not > any variables this should be more than sufficient for most applications. > > GCSs allocated via this mechanism will be freed when the thread exits. > > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> > Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org> > Signed-off-by: Mark Brown <broonie@kernel.org> Acked-by: Yury Khrustalev <yury.khrustalev@arm.com>
The 08/29/2024 00:27, Mark Brown wrote: > Unfortunately plain clone() is not extensible and existing clone3() > users will not specify a stack so all existing code would be broken if > we mandated specifying the stack explicitly. For compatibility with > these cases and also x86 (which did not initially implement clone3() > support for shadow stacks) if no GCS is specified we will allocate one > so when a thread is created which has GCS enabled allocate one for it. > We follow the extensively discussed x86 implementation and allocate > min(RLIMIT_STACK, 2G). Since the GCS only stores the call stack and not > any variables this should be more than sufficient for most applications. the code has RLIMIT_STACK/2 (which is what i expect on arm64, since gcs entry size is min stack frame / 2 if the stack is correctly aligned) > > GCSs allocated via this mechanism will be freed when the thread exits. i see gcs still mapped after thread exit when testing. > +static unsigned long gcs_size(unsigned long size) > +{ > + if (size) > + return PAGE_ALIGN(size); no /2 > + > + /* Allocate RLIMIT_STACK/2 with limits of PAGE_SIZE..2G */ > + size = PAGE_ALIGN(min_t(unsigned long long, > + rlimit(RLIMIT_STACK) / 2, SZ_2G)); has /2 > + return max(PAGE_SIZE, size); > +} > + > +unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, > + const struct kernel_clone_args *args) > +{ > + unsigned long addr, size; > + > + if (!system_supports_gcs()) > + return 0; > + > + if (!task_gcs_el0_enabled(tsk)) > + return 0; > + > + if ((args->flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM) { > + tsk->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); > + return 0; > + } > + > + size = args->stack_size; no /2 (i think this should be divided) > + > + size = gcs_size(size); > + addr = alloc_gcs(0, size); > + if (IS_ERR_VALUE(addr)) > + return addr; > + > + tsk->thread.gcs_base = addr; > + tsk->thread.gcs_size = size; > + tsk->thread.gcspr_el0 = addr + size - sizeof(u64); > + > + return addr; > +} ... > void gcs_free(struct task_struct *task) > { > + > + /* > + * When fork() with CLONE_VM fails, the child (tsk) already > + * has a GCS allocated, and exit_thread() calls this function > + * to free it. In this case the parent (current) and the > + * child share the same mm struct. > + */ > + if (!task->mm || task->mm != current->mm) > + return; > + > if (task->thread.gcs_base) > vm_munmap(task->thread.gcs_base, task->thread.gcs_size); not sure why this logic fails to free thread gcs (created with clone3 in glibc) other the gcs leak, my tests pass.
diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h index 04594ef59dad..c1f274fdb9c0 100644 --- a/arch/arm64/include/asm/gcs.h +++ b/arch/arm64/include/asm/gcs.h @@ -8,6 +8,8 @@ #include <asm/types.h> #include <asm/uaccess.h> +struct kernel_clone_args; + static inline void gcsb_dsync(void) { asm volatile(".inst 0xd503227f" : : : "memory"); @@ -58,6 +60,8 @@ static inline bool task_gcs_el0_enabled(struct task_struct *task) void gcs_set_el0_mode(struct task_struct *task); void gcs_free(struct task_struct *task); void gcs_preserve_current_state(void); +unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args); #else @@ -69,6 +73,11 @@ static inline bool task_gcs_el0_enabled(struct task_struct *task) static inline void gcs_set_el0_mode(struct task_struct *task) { } static inline void gcs_free(struct task_struct *task) { } static inline void gcs_preserve_current_state(void) { } +static inline unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args) +{ + return -ENOTSUPP; +} #endif diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 3622956b6515..de59aa16919c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -285,9 +285,29 @@ static void flush_gcs(void) write_sysreg_s(0, SYS_GCSPR_EL0); } +static int copy_thread_gcs(struct task_struct *p, + const struct kernel_clone_args *args) +{ + unsigned long gcs; + + gcs = gcs_alloc_thread_stack(p, args); + if (IS_ERR_VALUE(gcs)) + return PTR_ERR((void *)gcs); + + p->thread.gcs_el0_mode = current->thread.gcs_el0_mode; + p->thread.gcs_el0_locked = current->thread.gcs_el0_locked; + + return 0; +} + #else static void flush_gcs(void) { } +static int copy_thread_gcs(struct task_struct *p, + const struct kernel_clone_args *args) +{ + return 0; +} #endif @@ -303,6 +323,7 @@ void flush_thread(void) void arch_release_task_struct(struct task_struct *tsk) { fpsimd_release_task(tsk); + gcs_free(tsk); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) @@ -366,6 +387,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) unsigned long stack_start = args->stack; unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); + int ret; memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); @@ -407,6 +429,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.uw.tp_value = tls; p->thread.tpidr2_el0 = 0; } + + ret = copy_thread_gcs(p, args); + if (ret != 0) + return ret; } else { /* * A kthread has no context to ERET to, so ensure any buggy diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c index b0a67efc522b..6e8a5e14fff1 100644 --- a/arch/arm64/mm/gcs.c +++ b/arch/arm64/mm/gcs.c @@ -5,9 +5,69 @@ #include <linux/syscalls.h> #include <linux/types.h> +#include <asm/cmpxchg.h> #include <asm/cpufeature.h> +#include <asm/gcs.h> #include <asm/page.h> +static unsigned long alloc_gcs(unsigned long addr, unsigned long size) +{ + int flags = MAP_ANONYMOUS | MAP_PRIVATE; + struct mm_struct *mm = current->mm; + unsigned long mapped_addr, unused; + + if (addr) + flags |= MAP_FIXED_NOREPLACE; + + mmap_write_lock(mm); + mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags, + VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL); + mmap_write_unlock(mm); + + return mapped_addr; +} + +static unsigned long gcs_size(unsigned long size) +{ + if (size) + return PAGE_ALIGN(size); + + /* Allocate RLIMIT_STACK/2 with limits of PAGE_SIZE..2G */ + size = PAGE_ALIGN(min_t(unsigned long long, + rlimit(RLIMIT_STACK) / 2, SZ_2G)); + return max(PAGE_SIZE, size); +} + +unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args) +{ + unsigned long addr, size; + + if (!system_supports_gcs()) + return 0; + + if (!task_gcs_el0_enabled(tsk)) + return 0; + + if ((args->flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM) { + tsk->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); + return 0; + } + + size = args->stack_size; + + size = gcs_size(size); + addr = alloc_gcs(0, size); + if (IS_ERR_VALUE(addr)) + return addr; + + tsk->thread.gcs_base = addr; + tsk->thread.gcs_size = size; + tsk->thread.gcspr_el0 = addr + size - sizeof(u64); + + return addr; +} + /* * Apply the GCS mode configured for the specified task to the * hardware. @@ -30,6 +90,16 @@ void gcs_set_el0_mode(struct task_struct *task) void gcs_free(struct task_struct *task) { + + /* + * When fork() with CLONE_VM fails, the child (tsk) already + * has a GCS allocated, and exit_thread() calls this function + * to free it. In this case the parent (current) and the + * child share the same mm struct. + */ + if (!task->mm || task->mm != current->mm) + return; + if (task->thread.gcs_base) vm_munmap(task->thread.gcs_base, task->thread.gcs_size);