diff mbox series

[v26,22/30] x86/cet/shstk: Add user-mode shadow stack support

Message ID 20210427204315.24153-23-yu-cheng.yu@intel.com (mailing list archive)
State New, archived
Headers show
Series Control-flow Enforcement: Shadow Stack | expand

Commit Message

Yu-cheng Yu April 27, 2021, 8:43 p.m. UTC
Introduce basic shadow stack enabling/disabling/allocation routines.
A task's shadow stack is allocated from memory with VM_SHADOW_STACK flag
and has a fixed size of min(RLIMIT_STACK, 4GB).

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
---
v25:
- Change CONFIG_X86_CET to CONFIG_X86_SHADOW_STACK.
- Update alloc_shstk(), remove unused input flags.

v24:
- Rename cet.c to shstk.c, update related areas accordingly.

 arch/x86/include/asm/cet.h       |  29 ++++++++
 arch/x86/include/asm/processor.h |   5 ++
 arch/x86/kernel/Makefile         |   2 +
 arch/x86/kernel/shstk.c          | 123 +++++++++++++++++++++++++++++++
 4 files changed, 159 insertions(+)
 create mode 100644 arch/x86/include/asm/cet.h
 create mode 100644 arch/x86/kernel/shstk.c

Comments

Borislav Petkov April 28, 2021, 5:52 p.m. UTC | #1
On Tue, Apr 27, 2021 at 01:43:07PM -0700, Yu-cheng Yu wrote:
> @@ -535,6 +536,10 @@ struct thread_struct {
>  
>  	unsigned int		sig_on_uaccess_err:1;
>  
> +#ifdef CONFIG_X86_SHADOW_STACK
> +	struct cet_status	cet;

A couple of versions ago I said:

"	struct shstk_desc       shstk;

or so"

but no movement here. That thing is still called cet_status even though
there's nothing status-related with it.

So what's up?

> +static unsigned long alloc_shstk(unsigned long size)
> +{
> +	struct mm_struct *mm = current->mm;
> +	unsigned long addr, populate;
> +	int flags = MAP_ANONYMOUS | MAP_PRIVATE;

The tip-tree preferred ordering of variable declarations at the
beginning of a function is reverse fir tree order::

	struct long_struct_name *descriptive_name;
	unsigned long foo, bar;
	unsigned int tmp;
	int ret;

The above is faster to parse than the reverse ordering::

	int ret;
	unsigned int tmp;
	unsigned long foo, bar;
	struct long_struct_name *descriptive_name;

And even more so than random ordering::

	unsigned long foo, bar;
	int ret;
	struct long_struct_name *descriptive_name;
	unsigned int tmp;

Please fix it up everywhere.

> +	mmap_write_lock(mm);
> +	addr = do_mmap(NULL, 0, size, PROT_READ, flags, VM_SHADOW_STACK, 0,
> +		       &populate, NULL);
> +	mmap_write_unlock(mm);
> +
> +	return addr;
> +}
> +
> +int shstk_setup(void)
> +{
> +	unsigned long addr, size;
> +	struct cet_status *cet = &current->thread.cet;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_SHSTK))
> +		return -EOPNOTSUPP;
> +
> +	size = round_up(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G), PAGE_SIZE);
> +	addr = alloc_shstk(size);
> +	if (IS_ERR_VALUE(addr))
> +		return PTR_ERR((void *)addr);
> +
> +	cet->shstk_base = addr;
> +	cet->shstk_size = size;
> +
> +	start_update_msrs();
> +	wrmsrl(MSR_IA32_PL3_SSP, addr + size);
> +	wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
> +	end_update_msrs();

<---- newline here.

> +	return 0;
> +}
> +
> +void shstk_free(struct task_struct *tsk)
> +{
> +	struct cet_status *cet = &tsk->thread.cet;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_SHSTK) ||
> +	    !cet->shstk_size ||
> +	    !cet->shstk_base)
> +		return;
> +
> +	if (!tsk->mm)
> +		return;

Where are the comments you said you wanna add:

https://lkml.kernel.org/r/b05ee7eb-1b5d-f84f-c8f3-bfe9426e8a7d@intel.com

?

> +
> +	while (1) {
> +		int r;
> +
> +		r = vm_munmap(cet->shstk_base, cet->shstk_size);

		int r = vm_munmap...

> +
> +		/*
> +		 * vm_munmap() returns -EINTR when mmap_lock is held by
> +		 * something else, and that lock should not be held for a
> +		 * long time.  Retry it for the case.
> +		 */
> +		if (r == -EINTR) {
> +			cond_resched();
> +			continue;
> +		}
> +		break;
> +	}

vm_munmap() can return other negative error values, where are you
handling those?

> +
> +	cet->shstk_base = 0;
> +	cet->shstk_size = 0;
> +}
> +
> +void shstk_disable(void)
> +{
> +	struct cet_status *cet = &current->thread.cet;

Same question as before: what guarantees that current doesn't change
from under you here?

One of the worst thing to do is to ignore review comments. I'd strongly
suggest you pay more attention and avoid that in the future.

Thx.
Yu-cheng Yu April 28, 2021, 6:39 p.m. UTC | #2
On 4/28/2021 10:52 AM, Borislav Petkov wrote:
> On Tue, Apr 27, 2021 at 01:43:07PM -0700, Yu-cheng Yu wrote:
>> @@ -535,6 +536,10 @@ struct thread_struct {
>>   
>>   	unsigned int		sig_on_uaccess_err:1;
>>   
>> +#ifdef CONFIG_X86_SHADOW_STACK
>> +	struct cet_status	cet;
> 
> A couple of versions ago I said:
> 
> "	struct shstk_desc       shstk;
> 
> or so"
> 
> but no movement here. That thing is still called cet_status even though
> there's nothing status-related with it.
> 
> So what's up?
> 

Sorry about that.  After that email thread, we went ahead to separate 
shadow stack and ibt into different files.  I thought about the struct, 
the file names cet.h, etc.  The struct still needs to include ibt 
status, and if it is shstk_desc, the name is not entirely true.  One 
possible approach is, we don't make it a struct here, and put every item 
directly in thread_struct.  However, the benefit of putting all in a 
struct is understandable (you might argue the opposite :-)).  Please 
make the call, and I will do the change.

>> +static unsigned long alloc_shstk(unsigned long size)
>> +{
>> +	struct mm_struct *mm = current->mm;
>> +	unsigned long addr, populate;
>> +	int flags = MAP_ANONYMOUS | MAP_PRIVATE;
> 
> The tip-tree preferred ordering of variable declarations at the
> beginning of a function is reverse fir tree order::
> 
> 	struct long_struct_name *descriptive_name;
> 	unsigned long foo, bar;
> 	unsigned int tmp;
> 	int ret;
> 
> The above is faster to parse than the reverse ordering::
> 
> 	int ret;
> 	unsigned int tmp;
> 	unsigned long foo, bar;
> 	struct long_struct_name *descriptive_name;
> 
> And even more so than random ordering::
> 
> 	unsigned long foo, bar;
> 	int ret;
> 	struct long_struct_name *descriptive_name;
> 	unsigned int tmp;
> 
> Please fix it up everywhere.
> 

Ok!

>> +	mmap_write_lock(mm);
>> +	addr = do_mmap(NULL, 0, size, PROT_READ, flags, VM_SHADOW_STACK, 0,
>> +		       &populate, NULL);
>> +	mmap_write_unlock(mm);
>> +
>> +	return addr;
>> +}
>> +
>> +int shstk_setup(void)
>> +{
>> +	unsigned long addr, size;
>> +	struct cet_status *cet = &current->thread.cet;
>> +
>> +	if (!cpu_feature_enabled(X86_FEATURE_SHSTK))
>> +		return -EOPNOTSUPP;
>> +
>> +	size = round_up(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G), PAGE_SIZE);
>> +	addr = alloc_shstk(size);
>> +	if (IS_ERR_VALUE(addr))
>> +		return PTR_ERR((void *)addr);
>> +
>> +	cet->shstk_base = addr;
>> +	cet->shstk_size = size;
>> +
>> +	start_update_msrs();
>> +	wrmsrl(MSR_IA32_PL3_SSP, addr + size);
>> +	wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
>> +	end_update_msrs();
> 
> <---- newline here.
> 
>> +	return 0;
>> +}
>> +
>> +void shstk_free(struct task_struct *tsk)
>> +{
>> +	struct cet_status *cet = &tsk->thread.cet;
>> +
>> +	if (!cpu_feature_enabled(X86_FEATURE_SHSTK) ||
>> +	    !cet->shstk_size ||
>> +	    !cet->shstk_base)
>> +		return;
>> +
>> +	if (!tsk->mm)
>> +		return;
> 
> Where are the comments you said you wanna add:
> 
> https://lkml.kernel.org/r/b05ee7eb-1b5d-f84f-c8f3-bfe9426e8a7d@intel.com
> 
> ?
> 

Yes, the comments are in patch #23: Handle thread shadow stack.  I 
wanted to add that in the patch that takes the path.

>> +
>> +	while (1) {
>> +		int r;
>> +
>> +		r = vm_munmap(cet->shstk_base, cet->shstk_size);
> 
> 		int r = vm_munmap...
> 
>> +
>> +		/*
>> +		 * vm_munmap() returns -EINTR when mmap_lock is held by
>> +		 * something else, and that lock should not be held for a
>> +		 * long time.  Retry it for the case.
>> +		 */
>> +		if (r == -EINTR) {
>> +			cond_resched();
>> +			continue;
>> +		}
>> +		break;
>> +	}
> 
> vm_munmap() can return other negative error values, where are you
> handling those?
> 

For other error values, the loop stops.

>> +
>> +	cet->shstk_base = 0;
>> +	cet->shstk_size = 0;
>> +}
>> +
>> +void shstk_disable(void)
>> +{
>> +	struct cet_status *cet = &current->thread.cet;
> 
> Same question as before: what guarantees that current doesn't change
> from under you here?

The actual reading/writing MSRs are protected by fpregs_lock().

> 
> One of the worst thing to do is to ignore review comments. I'd strongly
> suggest you pay more attention and avoid that in the future.
> 
> Thx.
>
Borislav Petkov April 29, 2021, 9:12 a.m. UTC | #3
On Wed, Apr 28, 2021 at 11:39:00AM -0700, Yu, Yu-cheng wrote:
> Sorry about that.  After that email thread, we went ahead to separate shadow
> stack and ibt into different files.  I thought about the struct, the file
> names cet.h, etc.  The struct still needs to include ibt status, and if it
> is shstk_desc, the name is not entirely true.  One possible approach is, we
> don't make it a struct here, and put every item directly in thread_struct.
> However, the benefit of putting all in a struct is understandable (you might
> argue the opposite :-)).  Please make the call, and I will do the change.

/me looks forward into the patchset...

So this looks like the final version of it:

@@ -15,6 +15,7 @@ struct cet_status {
 	unsigned long	shstk_base;
 	unsigned long	shstk_size;
 	unsigned int	locked:1;
+	unsigned int	ibt_enabled:1;
 };

If so, that thing should be simply:

	struct cet {
		unsigned long shstk_base;
		unsigned long shstk_size;
		unsigned int shstk_lock : 1,
			     ibt	: 1;
	}

Is that ibt flag per thread or why is it here? I guess I'll find out.

/me greps...

ah yes, it is.

> Yes, the comments are in patch #23: Handle thread shadow stack.  I wanted to
> add that in the patch that takes the path.

That comes next, I'll look there.

> > vm_munmap() can return other negative error values, where are you
> > handling those?
> > 
> 
> For other error values, the loop stops.

And then what happens?

> > > +	cet->shstk_base = 0;
> > > +	cet->shstk_size = 0;

You clear those here without even checking whether unmap failed somehow.
And then stuff leaks but we don't care, right?

Someone else's problem, I'm sure.
Yu-cheng Yu April 29, 2021, 4:17 p.m. UTC | #4
On 4/29/2021 2:12 AM, Borislav Petkov wrote:
> On Wed, Apr 28, 2021 at 11:39:00AM -0700, Yu, Yu-cheng wrote:
>> Sorry about that.  After that email thread, we went ahead to separate shadow
>> stack and ibt into different files.  I thought about the struct, the file
>> names cet.h, etc.  The struct still needs to include ibt status, and if it
>> is shstk_desc, the name is not entirely true.  One possible approach is, we
>> don't make it a struct here, and put every item directly in thread_struct.
>> However, the benefit of putting all in a struct is understandable (you might
>> argue the opposite :-)).  Please make the call, and I will do the change.
> 
> /me looks forward into the patchset...
> 
> So this looks like the final version of it:
> 
> @@ -15,6 +15,7 @@ struct cet_status {
>   	unsigned long	shstk_base;
>   	unsigned long	shstk_size;
>   	unsigned int	locked:1;
> +	unsigned int	ibt_enabled:1;
>   };
> 
> If so, that thing should be simply:
> 
> 	struct cet {
> 		unsigned long shstk_base;
> 		unsigned long shstk_size;
> 		unsigned int shstk_lock : 1,
> 			     ibt	: 1;
> 	}
> 
> Is that ibt flag per thread or why is it here? I guess I'll find out.
> 
> /me greps...
> 
> ah yes, it is.
> 

The lock applies to both shadow stack and ibt.  So maybe just "locked"?

>> Yes, the comments are in patch #23: Handle thread shadow stack.  I wanted to
>> add that in the patch that takes the path.
> 
> That comes next, I'll look there.
> 
>>> vm_munmap() can return other negative error values, where are you
>>> handling those?
>>>
>>
>> For other error values, the loop stops.
> 
> And then what happens?
> 
>>>> +	cet->shstk_base = 0;
>>>> +	cet->shstk_size = 0;
> 
> You clear those here without even checking whether unmap failed somehow.
> And then stuff leaks but we don't care, right?
> 
> Someone else's problem, I'm sure.
> 

vm_munmap() returns error as the following:

(1) -EINVAL: address/size/alignment is wrong.
	For shadow stack, the kernel keeps track of it, this cannot/should not 
happen.  Should it happen, it is a bug.  The kernel can probably do WARN().

(2) -ENOMEM: when doing __split_vma()/__vma_adjust(), kmem_cache_alloc() 
fails.
	Not much we can do.  Perhaps WARN()?

(3) -EINTR: mmap_write_lock_killable(mm) fails.
	This should only happen to a pthread.  When a thread is existing, its 
siblings are holding mm->mmap_lock.  This is handled here.

Right now, in the kernel, only the munmap() syscall returns 
__vm_munmap() error code, otherwise the error is not checked.  Within 
the kernel and if -EINTR is not expected, this makes sense as explained 
above.

Thanks for questioning.  This piece needs to be correct.

Yu-cheng
Borislav Petkov April 29, 2021, 4:45 p.m. UTC | #5
On Thu, Apr 29, 2021 at 09:17:06AM -0700, Yu, Yu-cheng wrote:
> The lock applies to both shadow stack and ibt.  So maybe just "locked"?

Sure.

> vm_munmap() returns error as the following:
> 
> (1) -EINVAL: address/size/alignment is wrong.
> 	For shadow stack, the kernel keeps track of it, this cannot/should not
> happen.

You mean nothing might corrupt

        cet->shstk_base
        cet->shstk_size

?

I can't count the ways I've heard "should not happen" before and then it
happening anyway.

So probably not but we better catch stuff like that instead of leaking.

> Should it happen, it is a bug.

Ack.

> The kernel can probably do WARN().

Most definitely WARN. You need to catch funsies like that. But WARN_ONCE
should be enough for now.

> (2) -ENOMEM: when doing __split_vma()/__vma_adjust(), kmem_cache_alloc()
> fails.
> 	Not much we can do.  Perhaps WARN()?

You got it.

Bottom line is: if you can check for this and it is cheap, then
definitely. Code changes, gets rewritten, reorganized, the old
assertions change significance, and so on...

Thx.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/cet.h b/arch/x86/include/asm/cet.h
new file mode 100644
index 000000000000..aa85d599b184
--- /dev/null
+++ b/arch/x86/include/asm/cet.h
@@ -0,0 +1,29 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CET_H
+#define _ASM_X86_CET_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+struct task_struct;
+/*
+ * Per-thread CET status
+ */
+struct cet_status {
+	unsigned long	shstk_base;
+	unsigned long	shstk_size;
+};
+
+#ifdef CONFIG_X86_SHADOW_STACK
+int shstk_setup(void);
+void shstk_free(struct task_struct *p);
+void shstk_disable(void);
+#else
+static inline int shstk_setup(void) { return 0; }
+static inline void shstk_free(struct task_struct *p) {}
+static inline void shstk_disable(void) {}
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_CET_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f1b9ed5efaa9..3690b78e55bb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -27,6 +27,7 @@  struct vm86;
 #include <asm/unwind_hints.h>
 #include <asm/vmxfeatures.h>
 #include <asm/vdso/processor.h>
+#include <asm/cet.h>
 
 #include <linux/personality.h>
 #include <linux/cache.h>
@@ -535,6 +536,10 @@  struct thread_struct {
 
 	unsigned int		sig_on_uaccess_err:1;
 
+#ifdef CONFIG_X86_SHADOW_STACK
+	struct cet_status	cet;
+#endif
+
 	/* Floating point and extended processor state */
 	struct fpu		fpu;
 	/*
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2ddf08351f0b..0f99b093f350 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -150,6 +150,8 @@  obj-$(CONFIG_UNWINDER_FRAME_POINTER)	+= unwind_frame.o
 obj-$(CONFIG_UNWINDER_GUESS)		+= unwind_guess.o
 
 obj-$(CONFIG_AMD_MEM_ENCRYPT)		+= sev-es.o
+obj-$(CONFIG_X86_SHADOW_STACK)		+= shstk.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
new file mode 100644
index 000000000000..c815c7507830
--- /dev/null
+++ b/arch/x86/kernel/shstk.c
@@ -0,0 +1,123 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * shstk.c - Intel shadow stack support
+ *
+ * Copyright (c) 2021, Intel Corporation.
+ * Yu-cheng Yu <yu-cheng.yu@intel.com>
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/sched/signal.h>
+#include <linux/compat.h>
+#include <linux/sizes.h>
+#include <linux/user.h>
+#include <asm/msr.h>
+#include <asm/fpu/internal.h>
+#include <asm/fpu/xstate.h>
+#include <asm/fpu/types.h>
+#include <asm/cet.h>
+
+static void start_update_msrs(void)
+{
+	fpregs_lock();
+	if (test_thread_flag(TIF_NEED_FPU_LOAD))
+		__fpregs_load_activate();
+}
+
+static void end_update_msrs(void)
+{
+	fpregs_unlock();
+}
+
+static unsigned long alloc_shstk(unsigned long size)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, populate;
+	int flags = MAP_ANONYMOUS | MAP_PRIVATE;
+
+	mmap_write_lock(mm);
+	addr = do_mmap(NULL, 0, size, PROT_READ, flags, VM_SHADOW_STACK, 0,
+		       &populate, NULL);
+	mmap_write_unlock(mm);
+
+	return addr;
+}
+
+int shstk_setup(void)
+{
+	unsigned long addr, size;
+	struct cet_status *cet = &current->thread.cet;
+
+	if (!cpu_feature_enabled(X86_FEATURE_SHSTK))
+		return -EOPNOTSUPP;
+
+	size = round_up(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G), PAGE_SIZE);
+	addr = alloc_shstk(size);
+	if (IS_ERR_VALUE(addr))
+		return PTR_ERR((void *)addr);
+
+	cet->shstk_base = addr;
+	cet->shstk_size = size;
+
+	start_update_msrs();
+	wrmsrl(MSR_IA32_PL3_SSP, addr + size);
+	wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
+	end_update_msrs();
+	return 0;
+}
+
+void shstk_free(struct task_struct *tsk)
+{
+	struct cet_status *cet = &tsk->thread.cet;
+
+	if (!cpu_feature_enabled(X86_FEATURE_SHSTK) ||
+	    !cet->shstk_size ||
+	    !cet->shstk_base)
+		return;
+
+	if (!tsk->mm)
+		return;
+
+	while (1) {
+		int r;
+
+		r = vm_munmap(cet->shstk_base, cet->shstk_size);
+
+		/*
+		 * vm_munmap() returns -EINTR when mmap_lock is held by
+		 * something else, and that lock should not be held for a
+		 * long time.  Retry it for the case.
+		 */
+		if (r == -EINTR) {
+			cond_resched();
+			continue;
+		}
+		break;
+	}
+
+	cet->shstk_base = 0;
+	cet->shstk_size = 0;
+}
+
+void shstk_disable(void)
+{
+	struct cet_status *cet = &current->thread.cet;
+	u64 msr_val;
+
+	if (!cpu_feature_enabled(X86_FEATURE_SHSTK) ||
+	    !cet->shstk_size ||
+	    !cet->shstk_base)
+		return;
+
+	start_update_msrs();
+	rdmsrl(MSR_IA32_U_CET, msr_val);
+	wrmsrl(MSR_IA32_U_CET, msr_val & ~CET_SHSTK_EN);
+	wrmsrl(MSR_IA32_PL3_SSP, 0);
+	end_update_msrs();
+
+	shstk_free(current);
+}