diff mbox series

[v9,28/42] x86/shstk: Add user-mode shadow stack support

Message ID 20230613001108.3040476-29-rick.p.edgecombe@intel.com (mailing list archive)
State New
Headers show
Series Shadow stacks for userspace | expand

Commit Message

Edgecombe, Rick P June 13, 2023, 12:10 a.m. UTC
Introduce basic shadow stack enabling/disabling/allocation routines.
A task's shadow stack is allocated from memory with VM_SHADOW_STACK flag
and has a fixed size of min(RLIMIT_STACK, 4GB).

Keep the task's shadow stack address and size in thread_struct. This will
be copied when cloning new threads, but needs to be cleared during exec,
so add a function to do this.

32 bit shadow stack is not expected to have many users and it will
complicate the signal implementation. So do not support IA32 emulation
or x32.

Co-developed-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Mike Rapoport (IBM) <rppt@kernel.org>
Tested-by: Pengfei Xu <pengfei.xu@intel.com>
Tested-by: John Allen <john.allen@amd.com>
Tested-by: Kees Cook <keescook@chromium.org>
---
 arch/x86/include/asm/processor.h  |   2 +
 arch/x86/include/asm/shstk.h      |   7 ++
 arch/x86/include/uapi/asm/prctl.h |   3 +
 arch/x86/kernel/shstk.c           | 145 ++++++++++++++++++++++++++++++
 4 files changed, 157 insertions(+)

Comments

Mark Brown June 27, 2023, 5:20 p.m. UTC | #1
On Mon, Jun 12, 2023 at 05:10:54PM -0700, Rick Edgecombe wrote:

> +static void unmap_shadow_stack(u64 base, u64 size)
> +{
> +	while (1) {
> +		int r;
> +
> +		r = vm_munmap(base, size);
> +
> +		/*
> +		 * vm_munmap() returns -EINTR when mmap_lock is held by
> +		 * something else, and that lock should not be held for a
> +		 * long time.  Retry it for the case.
> +		 */
> +		if (r == -EINTR) {
> +			cond_resched();
> +			continue;
> +		}

This looks generic, not even shadow stack specific - was there any
discussion of making it a vm_munmap_retry() (that's not a great name...)
or similar?  I didn't see any in old versions of the thread but I
might've missed something.
Dave Hansen June 27, 2023, 11:46 p.m. UTC | #2
On 6/27/23 10:20, Mark Brown wrote:
> On Mon, Jun 12, 2023 at 05:10:54PM -0700, Rick Edgecombe wrote:
> 
>> +static void unmap_shadow_stack(u64 base, u64 size)
>> +{
>> +	while (1) {
>> +		int r;
>> +
>> +		r = vm_munmap(base, size);
>> +
>> +		/*
>> +		 * vm_munmap() returns -EINTR when mmap_lock is held by
>> +		 * something else, and that lock should not be held for a
>> +		 * long time.  Retry it for the case.
>> +		 */
>> +		if (r == -EINTR) {
>> +			cond_resched();
>> +			continue;
>> +		}
> This looks generic, not even shadow stack specific - was there any
> discussion of making it a vm_munmap_retry() (that's not a great name...)
> or similar?  I didn't see any in old versions of the thread but I
> might've missed something.

Yeah, that looks odd.  Also odd is that none of the other users of
vm_munmap() bother to check the return value (except for one passthrough
in the nommu code).

I don't think the EINTR happens during contention, though.  It's really
there to be able break out in the face of SIGKILL.  I think that's why
nobody handles it: the task is dying anyway and nobody cares.

Rick, was this hunk here for a specific reason or were you just trying
to be diligent in handling errors?
Edgecombe, Rick P June 28, 2023, 12:37 a.m. UTC | #3
On Tue, 2023-06-27 at 16:46 -0700, Dave Hansen wrote:
> > This looks generic, not even shadow stack specific - was there any
> > discussion of making it a vm_munmap_retry() (that's not a great
> > name...)
> > or similar?  I didn't see any in old versions of the thread but I
> > might've missed something.
> 
> Yeah, that looks odd.  Also odd is that none of the other users of
> vm_munmap() bother to check the return value (except for one
> passthrough
> in the nommu code).
> 
> I don't think the EINTR happens during contention, though.  It's really
> there to be able break out in the face of SIGKILL.  I think that's why
> nobody handles it: the task is dying anyway and nobody cares.
> 
> Rick, was this hunk here for a specific reason or were you just trying
> to be diligent in handling errors?

I'm not aware of any specific required cases. I think it is just pure
dilligence, originating from this comment:
https://lore.kernel.org/lkml/9847845a-749d-47a3-2a1d-bcc7c35f1bdd@intel.com/#t

I didn't see a need to remove it. 

The SIGKILL certaintly sounds like something more true than the
comment, but I can't do much of dive until next week. I would think we
need to handle EINTR differently to not WARN at least.

Yea, some of it does seem like the kind of thing that could live
outside of x86 shstk.c. But I'm not sure about the WARN part. That
should probably live in the caller. I guess someday we might even find
some shadow stack specific logic that could be cross-arch.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 407d5551b6a7..2a5ec5750ba7 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -479,6 +479,8 @@  struct thread_struct {
 #ifdef CONFIG_X86_USER_SHADOW_STACK
 	unsigned long		features;
 	unsigned long		features_locked;
+
+	struct thread_shstk	shstk;
 #endif
 
 	/* Floating point and extended processor state */
diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
index ec753809f074..2b1f7c9b9995 100644
--- a/arch/x86/include/asm/shstk.h
+++ b/arch/x86/include/asm/shstk.h
@@ -8,12 +8,19 @@ 
 struct task_struct;
 
 #ifdef CONFIG_X86_USER_SHADOW_STACK
+struct thread_shstk {
+	u64	base;
+	u64	size;
+};
+
 long shstk_prctl(struct task_struct *task, int option, unsigned long features);
 void reset_thread_features(void);
+void shstk_free(struct task_struct *p);
 #else
 static inline long shstk_prctl(struct task_struct *task, int option,
 			       unsigned long arg2) { return -EINVAL; }
 static inline void reset_thread_features(void) {}
+static inline void shstk_free(struct task_struct *p) {}
 #endif /* CONFIG_X86_USER_SHADOW_STACK */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 1cd44ecc9ce0..6a8e0e1bff4a 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -34,4 +34,7 @@ 
 #define ARCH_SHSTK_DISABLE		0x5002
 #define ARCH_SHSTK_LOCK			0x5003
 
+/* ARCH_SHSTK_ features bits */
+#define ARCH_SHSTK_SHSTK		(1ULL <<  0)
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
index 41ed6552e0a5..3cb85224d856 100644
--- a/arch/x86/kernel/shstk.c
+++ b/arch/x86/kernel/shstk.c
@@ -8,14 +8,159 @@ 
 
 #include <linux/sched.h>
 #include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/sched/signal.h>
+#include <linux/compat.h>
+#include <linux/sizes.h>
+#include <linux/user.h>
+#include <asm/msr.h>
+#include <asm/fpu/xstate.h>
+#include <asm/fpu/types.h>
+#include <asm/shstk.h>
+#include <asm/special_insns.h>
+#include <asm/fpu/api.h>
 #include <asm/prctl.h>
 
+static bool features_enabled(unsigned long features)
+{
+	return current->thread.features & features;
+}
+
+static void features_set(unsigned long features)
+{
+	current->thread.features |= features;
+}
+
+static void features_clr(unsigned long features)
+{
+	current->thread.features &= ~features;
+}
+
+static unsigned long alloc_shstk(unsigned long size)
+{
+	int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, unused;
+
+	mmap_write_lock(mm);
+	addr = do_mmap(NULL, addr, size, PROT_READ, flags,
+		       VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
+
+	mmap_write_unlock(mm);
+
+	return addr;
+}
+
+static unsigned long adjust_shstk_size(unsigned long size)
+{
+	if (size)
+		return PAGE_ALIGN(size);
+
+	return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
+}
+
+static void unmap_shadow_stack(u64 base, u64 size)
+{
+	while (1) {
+		int r;
+
+		r = vm_munmap(base, size);
+
+		/*
+		 * vm_munmap() returns -EINTR when mmap_lock is held by
+		 * something else, and that lock should not be held for a
+		 * long time.  Retry it for the case.
+		 */
+		if (r == -EINTR) {
+			cond_resched();
+			continue;
+		}
+
+		/*
+		 * For all other types of vm_munmap() failure, either the
+		 * system is out of memory or there is bug.
+		 */
+		WARN_ON_ONCE(r);
+		break;
+	}
+}
+
+static int shstk_setup(void)
+{
+	struct thread_shstk *shstk = &current->thread.shstk;
+	unsigned long addr, size;
+
+	/* Already enabled */
+	if (features_enabled(ARCH_SHSTK_SHSTK))
+		return 0;
+
+	/* Also not supported for 32 bit and x32 */
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_32bit_syscall())
+		return -EOPNOTSUPP;
+
+	size = adjust_shstk_size(0);
+	addr = alloc_shstk(size);
+	if (IS_ERR_VALUE(addr))
+		return PTR_ERR((void *)addr);
+
+	fpregs_lock_and_load();
+	wrmsrl(MSR_IA32_PL3_SSP, addr + size);
+	wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
+	fpregs_unlock();
+
+	shstk->base = addr;
+	shstk->size = size;
+	features_set(ARCH_SHSTK_SHSTK);
+
+	return 0;
+}
+
 void reset_thread_features(void)
 {
+	memset(&current->thread.shstk, 0, sizeof(struct thread_shstk));
 	current->thread.features = 0;
 	current->thread.features_locked = 0;
 }
 
+void shstk_free(struct task_struct *tsk)
+{
+	struct thread_shstk *shstk = &tsk->thread.shstk;
+
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+	    !features_enabled(ARCH_SHSTK_SHSTK))
+		return;
+
+	if (!tsk->mm)
+		return;
+
+	unmap_shadow_stack(shstk->base, shstk->size);
+}
+
+static int shstk_disable(void)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+		return -EOPNOTSUPP;
+
+	/* Already disabled? */
+	if (!features_enabled(ARCH_SHSTK_SHSTK))
+		return 0;
+
+	fpregs_lock_and_load();
+	/* Disable WRSS too when disabling shadow stack */
+	wrmsrl(MSR_IA32_U_CET, 0);
+	wrmsrl(MSR_IA32_PL3_SSP, 0);
+	fpregs_unlock();
+
+	shstk_free(current);
+	features_clr(ARCH_SHSTK_SHSTK);
+
+	return 0;
+}
+
 long shstk_prctl(struct task_struct *task, int option, unsigned long features)
 {
 	if (option == ARCH_SHSTK_LOCK) {