@@ -373,6 +373,7 @@
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
451 common map_shadow_stack sys_map_shadow_stack
+452 common sigaltshstk sys_sigaltshstk
#
# Due to a historical design error, certain syscalls are numbered differently
@@ -26,6 +26,7 @@ void reset_thread_shstk(void);
int setup_signal_shadow_stack(struct ksignal *ksig);
int restore_signal_shadow_stack(void);
int wrss_control(bool enable);
+void reset_alt_shstk(void);
#else
static inline long cet_prctl(struct task_struct *task, int option,
unsigned long features) { return -EINVAL; }
@@ -40,6 +41,7 @@ static inline void reset_thread_shstk(void) {}
static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
static inline int restore_signal_shadow_stack(void) { return 0; }
static inline int wrss_control(bool enable) { return -EOPNOTSUPP; }
+static inline void reset_alt_shstk(void) {}
#endif /* CONFIG_X86_SHADOW_STACK */
#endif /* __ASSEMBLY__ */
@@ -536,6 +536,9 @@ struct thread_struct {
#ifdef CONFIG_X86_SHADOW_STACK
struct thread_shstk shstk;
+ unsigned long sas_shstk_sp;
+ size_t sas_shstk_size;
+ unsigned int sas_shstk_flags;
#endif
/* Floating point and extended processor state */
@@ -176,6 +176,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
frame->flags = X86_EFLAGS_FIXED;
#endif
+ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
+ reset_alt_shstk();
+
/* Allocate a new shadow stack for pthread if needed */
ret = shstk_alloc_thread_stack(p, clone_flags, args->flags, &shstk_addr);
if (ret)
@@ -25,6 +25,7 @@
#include <asm/special_insns.h>
#include <asm/fpu/api.h>
#include <asm/prctl.h>
+#include <asm/signal.h>
#define SS_FRAME_SIZE 8
@@ -149,11 +150,18 @@ int shstk_setup(void)
return 0;
}
+void reset_alt_shstk(void)
+{
+ current->thread.sas_shstk_sp = 0;
+ current->thread.sas_shstk_size = 0;
+}
+
void reset_thread_shstk(void)
{
memset(¤t->thread.shstk, 0, sizeof(struct thread_shstk));
current->thread.features = 0;
current->thread.features_locked = 0;
+ reset_alt_shstk();
}
int shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
@@ -238,39 +246,67 @@ static int get_shstk_data(unsigned long *data, unsigned long __user *addr)
return 0;
}
+static bool on_alt_shstk(unsigned long ssp)
+{
+ unsigned long alt_ss_start = current->thread.sas_shstk_sp;
+ unsigned long alt_ss_end = alt_ss_start + current->thread.sas_shstk_size;
+
+ return ssp >= alt_ss_start && ssp < alt_ss_end;
+}
+
+static bool alt_shstk_active(void)
+{
+ return current->thread.sas_shstk_sp;
+}
+
+static bool alt_shstk_valid(unsigned long ssp, size_t size)
+{
+ if (ssp && (size < PAGE_SIZE || size >= TASK_SIZE_MAX))
+ return -EINVAL;
+
+ if (ssp >= TASK_SIZE_MAX)
+ return -EINVAL;
+
+ return 0;
+}
+
/*
- * Create a restore token on shadow stack, and then push the user-mode
- * function return address.
+ * Verify the user shadow stack has a valid token on it, and then set
+ * *new_ssp according to the token.
*/
-static int shstk_setup_rstor_token(unsigned long ret_addr, unsigned long *new_ssp)
+static int shstk_check_rstor_token(unsigned long token_addr, unsigned long *new_ssp)
{
- unsigned long ssp, token_addr;
- int err;
+ unsigned long token;
- if (!ret_addr)
+ if (get_user(token, (unsigned long __user *)token_addr))
+ return -EFAULT;
+
+ /* Is mode flag correct? */
+ if (!(token & BIT(0)))
return -EINVAL;
- ssp = get_user_shstk_addr();
- if (!ssp)
+ /* Is busy flag set? */
+ if (token & BIT(1))
return -EINVAL;
- err = create_rstor_token(ssp, &token_addr);
- if (err)
- return err;
+ /* Mask out flags */
+ token &= ~3UL;
+
+ /* Restore address aligned? */
+ if (!IS_ALIGNED(token, 8))
+ return -EINVAL;
- ssp = token_addr - sizeof(u64);
- err = write_user_shstk_64((u64 __user *)ssp, (u64)ret_addr);
+ /* Token placed properly? */
+ if (((ALIGN_DOWN(token, 8) - 8) != token_addr) || token >= TASK_SIZE_MAX)
+ return -EINVAL;
- if (!err)
- *new_ssp = ssp;
+ *new_ssp = token;
- return err;
+ return 0;
}
-static int shstk_push_sigframe(unsigned long *ssp)
+static int shstk_push_sigframe(unsigned long *ssp, unsigned long target_ssp)
{
- unsigned long target_ssp = *ssp;
-
/* Token must be aligned */
if (!IS_ALIGNED(*ssp, 8))
return -EINVAL;
@@ -278,17 +314,32 @@ static int shstk_push_sigframe(unsigned long *ssp)
if (!IS_ALIGNED(target_ssp, 8))
return -EINVAL;
+ *ssp -= SS_FRAME_SIZE;
+ if (write_user_shstk_64((u64 __user *)*ssp, 0))
+ return -EFAULT;
+
+ *ssp -= SS_FRAME_SIZE;
+ if (put_shstk_data((u64 __user *)*ssp, current->thread.sas_shstk_sp))
+ return -EFAULT;
+
+ *ssp -= SS_FRAME_SIZE;
+ if (put_shstk_data((u64 __user *)*ssp, current->thread.sas_shstk_size))
+ return -EFAULT;
+
*ssp -= SS_FRAME_SIZE;
if (put_shstk_data((void *__user)*ssp, target_ssp))
return -EFAULT;
+ current->thread.sas_shstk_sp = 0;
+ current->thread.sas_shstk_size = 0;
+
return 0;
}
static int shstk_pop_sigframe(unsigned long *ssp)
{
- unsigned long token_addr;
+ unsigned long token_addr, shstk_sp, shstk_size;
int err;
err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp);
@@ -303,7 +354,38 @@ static int shstk_pop_sigframe(unsigned long *ssp)
if (unlikely(token_addr >= TASK_SIZE_MAX))
return -EINVAL;
+ *ssp += SS_FRAME_SIZE;
+ err = get_shstk_data(&shstk_size, (void __user *)*ssp);
+ if (unlikely(err))
+ return err;
+
+ *ssp += SS_FRAME_SIZE;
+ err = get_shstk_data(&shstk_sp, (void __user *)*ssp);
+ if (unlikely(err))
+ return err;
+
+ if (unlikely(alt_shstk_valid((unsigned long)shstk_sp, shstk_size)))
+ return -EINVAL;
+
*ssp = token_addr;
+ current->thread.sas_shstk_sp = shstk_sp;
+ current->thread.sas_shstk_size = shstk_size;
+
+ return 0;
+}
+
+static unsigned long get_sig_start_ssp(unsigned long orig_ssp, unsigned long *ssp)
+{
+ unsigned long sp_end = (current->thread.sas_shstk_sp +
+ current->thread.sas_shstk_size) - SS_FRAME_SIZE;
+
+ if (!alt_shstk_active() || on_alt_shstk(*ssp)) {
+ *ssp = orig_ssp;
+ return 0;
+ }
+
+ if (shstk_check_rstor_token(sp_end, ssp))
+ return -EINVAL;
return 0;
}
@@ -311,7 +393,7 @@ static int shstk_pop_sigframe(unsigned long *ssp)
int setup_signal_shadow_stack(struct ksignal *ksig)
{
void __user *restorer = ksig->ka.sa.sa_restorer;
- unsigned long ssp;
+ unsigned long ssp, orig_ssp;
int err;
if (!cpu_feature_enabled(X86_FEATURE_SHSTK) ||
@@ -321,11 +403,15 @@ int setup_signal_shadow_stack(struct ksignal *ksig)
if (!restorer)
return -EINVAL;
- ssp = get_user_shstk_addr();
- if (unlikely(!ssp))
+ orig_ssp = get_user_shstk_addr();
+ if (unlikely(!orig_ssp))
return -EINVAL;
- err = shstk_push_sigframe(&ssp);
+ err = get_sig_start_ssp(orig_ssp, &ssp);
+ if (unlikely(err))
+ return err;
+
+ err = shstk_push_sigframe(&ssp, orig_ssp);
if (unlikely(err))
return err;
@@ -496,3 +582,47 @@ long cet_prctl(struct task_struct *task, int option, unsigned long features)
return wrss_control(true);
return -EINVAL;
}
+
+SYSCALL_DEFINE2(sigaltshstk, const stack_t __user *, uss, stack_t __user *, uoss)
+{
+ unsigned long ssp;
+ stack_t new, old;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SHSTK))
+ return -ENOSYS;
+
+ ssp = get_user_shstk_addr();
+
+ if (unlikely(!ssp || on_alt_shstk(ssp)))
+ return -EPERM;
+
+ if (uss) {
+ if (unlikely(copy_from_user(&new, uss, sizeof(stack_t))))
+ return -EFAULT;
+
+ if (unlikely(alt_shstk_valid((unsigned long)new.ss_sp,
+ new.ss_size)))
+ return -EINVAL;
+
+ if (new.ss_flags & SS_DISABLE) {
+ current->thread.sas_shstk_sp = 0;
+ current->thread.sas_shstk_size = 0;
+ return 0;
+ }
+
+ current->thread.sas_shstk_sp = (unsigned long) new.ss_sp;
+ current->thread.sas_shstk_size = new.ss_size;
+ /* No saved flags for now */
+ }
+
+ if (!uoss)
+ return 0;
+
+ memset(&old, 0, sizeof(stack_t));
+ old.ss_sp = (void __user *)current->thread.sas_shstk_sp;
+ old.ss_size = current->thread.sas_shstk_size;
+ if (copy_to_user(uoss, &old, sizeof(stack_t)))
+ return -EFAULT;
+
+ return 0;
+}
@@ -1057,6 +1057,7 @@ asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long l
unsigned long home_node,
unsigned long flags);
asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags);
+asmlinkage long sys_sigaltshstk(const struct sigaltstack *uss, struct sigaltstack *uoss);
/*
* Architecture-specific system calls
@@ -382,6 +382,7 @@ COND_SYSCALL(modify_ldt);
COND_SYSCALL(vm86);
COND_SYSCALL(kexec_file_load);
COND_SYSCALL(map_shadow_stack);
+COND_SYSCALL(sigaltshstk);
/* s390 */
COND_SYSCALL(s390_pci_mmio_read);
@@ -492,6 +492,76 @@ int test_userfaultfd(void)
return 1;
}
+volatile bool segv_pass;
+
+long sigaltshstk(stack_t *uss, stack_t *ouss)
+{
+ return syscall(__NR_sigaltshstk, uss, ouss);
+}
+
+void segv_alt_handler(int signum, siginfo_t *si, void *uc)
+{
+ unsigned long min = (unsigned long)shstk_ptr;
+ unsigned long max = (unsigned long)shstk_ptr + SS_SIZE;
+ unsigned long ssp = get_ssp();
+ stack_t alt_shstk_stackt;
+
+ if (sigaltshstk(NULL, &alt_shstk_stackt))
+ goto fail;
+
+ if (alt_shstk_stackt.ss_sp || alt_shstk_stackt.ss_size)
+ goto fail;
+
+ if (ssp < min || ssp > max - 8)
+ goto fail;
+
+ segv_pass = true;
+ return;
+fail:
+ segv_pass = false;
+}
+
+int test_shstk_alt_stack(void)
+{
+ stack_t alt_shstk_stackt;
+ struct sigaction sa;
+ int ret = 1;
+
+ sa.sa_sigaction = segv_alt_handler;
+ if (sigaction(SIGUSR1, &sa, NULL))
+ return 1;
+ sa.sa_flags = SA_SIGINFO;
+
+ shstk_ptr = create_shstk(0);
+ if (shstk_ptr == MAP_FAILED)
+ goto err_sig;
+
+ alt_shstk_stackt.ss_sp = shstk_ptr;
+ alt_shstk_stackt.ss_size = SS_SIZE;
+ if (sigaltshstk(&alt_shstk_stackt, NULL) == -1)
+ goto err_shstk;
+
+ segv_pass = false;
+
+ /* Make sure segv_was_on_alt is set before signal */
+ asm volatile("" : : : "memory");
+
+ raise(SIGUSR1);
+
+ if (segv_pass) {
+ printf("[OK]\tAlt shadow stack test.\n");
+ ret = 0;
+ }
+
+err_shstk:
+ alt_shstk_stackt.ss_flags = SS_DISABLE;
+ sigaltshstk(&alt_shstk_stackt, NULL);
+ free_shstk(shstk_ptr);
+err_sig:
+ signal(SIGUSR1, SIG_DFL);
+ return ret;
+}
+
int main(int argc, char *argv[])
{
int ret = 0;
@@ -556,6 +626,11 @@ int main(int argc, char *argv[])
printf("[FAIL]\tUserfaultfd test\n");
}
+ if (test_shstk_alt_stack()) {
+ ret = 1;
+ printf("[FAIL]\tAlt shadow stack test\n");
+ }
+
out:
/*
* Disable shadow stack before the function returns, or there will be a
To handle stack overflows, applications can register a separate signal alt stack to use for the stack to handle signals. To handle shadow stack overflows the kernel can similarly provide the ability to have an alt shadow stack. Signals push information about the execution context to the stack that will handle the signal. The data pushed is use to restore registers and other state after the signal. In the case of handling the signal on a normal stack, the stack just needs to be unwound over the stack frame, but in the case of alt stacks, the saved stack pointer is important for the sigreturn to find it’s way back to the thread. With shadow stack there is a new type of stack pointer, the shadow stack pointer (SSP), that needs to be restored. Just like the regular stack pointer, it needs to be saved somewhere in order to implement shadow alt stacks. This is already done as part of the token placed to prevent SROP attacks, so on sigreturn from an alt shadow stack, the kernel can easily know which SSP to restore. But to enable SS_AUTODISARM like functionality, the kernel also needs to push the shadow alt stack and size somewhere, like happens in regular alt stacks. So push this data using the same format. In the end the shadow stack sigframe looks like this: |1...old SSP|1...alt stack size|1...alt stack base| 0| In the future, any other data could come between the alt stack base and the guard zero. The guard zero is to prevent tricking the kernel into processing half of one frame and half of the adjacent frame. In past designs for userspace shadow stacks, shadow alt stacks were not supported. Since there was only one shadow stack, longjmp() could jump out of a signal by using incssp to unwind the SSP to the place where the setjmp() was called. Since alt shadow stacks are a new thing, simply don't support longjmp()ing from an alt shadow stacks. Introduce a new syscall "sigaltshstk" that behaves similarly to sigaltstack. Have it take new and old stack_t's to specify the base and length of the alt shadow stack. Don't have it adopt the same flag semantics though, because not all alt stack flags will necessarily apply to alt shadow stacks. As long as the syscall is getting new flag meanings make SS_AUTODISARM the default behavior for sigaltshstk(), and not require a flag. Today the only flag supported is SS_DISABLE, and a !SS_AUTODISARM mode is not supported. So when a signal hits it will jump to the location specified in sigaltshstk(). Currently (without WRSS), userspace doesn’t have the ability to arbitrarily set the SSP. But telling the kernel to set the SSP to an arbitrary point on signal is kind of like that. So there would be a weakening of the shadow stack protections unless additional checks are made. With the SS_AUTODISARM-style behavior, the SSP will only jump to the shadow stack if the SSP is not already on the shadow stack, otherwise it will just push the SSP. So have the kernel checks for a token whenever transitioning to the alt stack from a place other than the alt stack. This token can be written by the kernel during shadow stack allocation, using the map_shadow_stack syscall. Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> --- v2: - New patch arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/x86/include/asm/cet.h | 2 + arch/x86/include/asm/processor.h | 3 + arch/x86/kernel/process.c | 3 + arch/x86/kernel/shstk.c | 178 +++++++++++++++--- include/linux/syscalls.h | 1 + kernel/sys_ni.c | 1 + .../testing/selftests/x86/test_shadow_stack.c | 75 ++++++++ 8 files changed, 240 insertions(+), 24 deletions(-)