diff mbox series

[RFC,v9,25/27] x86/cet/shstk: Handle thread Shadow Stack

Message ID 20200205181935.3712-26-yu-cheng.yu@intel.com (mailing list archive)
State New, archived
Headers show
Series Control-flow Enforcement: Shadow Stack | expand

Commit Message

Yu-cheng Yu Feb. 5, 2020, 6:19 p.m. UTC
The Shadow Stack (SHSTK) for clone/fork is handled as the following:

(1) If ((clone_flags & (CLONE_VFORK | CLONE_VM)) == CLONE_VM),
    the kernel allocates (and frees on thread exit) a new SHSTK for the
    child.

    It is possible for the kernel to complete the clone syscall and set the
    child's SHSTK pointer to NULL and let the child thread allocate a SHSTK
    for itself.  There are two issues in this approach: It is not
    compatible with existing code that does inline syscall and it cannot
    handle signals before the child can successfully allocate a SHSTK.

(2) For (clone_flags & CLONE_VFORK), the child uses the existing SHSTK.

(3) For all other cases, the SHSTK is copied/reused whenever the parent or
    the child does a call/ret.

This patch handles cases (1) & (2).  Case (3) is handled in the SHSTK page
fault patches.

A 64-bit SHSTK has a fixed size of RLIMIT_STACK. A compat-mode thread SHSTK
has a fixed size of 1/4 RLIMIT_STACK.  This allows more threads to share a
32-bit address space.

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
---
 arch/x86/include/asm/cet.h         |  2 ++
 arch/x86/include/asm/mmu_context.h |  3 +++
 arch/x86/kernel/cet.c              | 41 ++++++++++++++++++++++++++++++
 arch/x86/kernel/process.c          |  7 +++++
 4 files changed, 53 insertions(+)

Comments

Kees Cook Feb. 25, 2020, 9:29 p.m. UTC | #1
On Wed, Feb 05, 2020 at 10:19:33AM -0800, Yu-cheng Yu wrote:
> The Shadow Stack (SHSTK) for clone/fork is handled as the following:
> 
> (1) If ((clone_flags & (CLONE_VFORK | CLONE_VM)) == CLONE_VM),
>     the kernel allocates (and frees on thread exit) a new SHSTK for the
>     child.
> 
>     It is possible for the kernel to complete the clone syscall and set the
>     child's SHSTK pointer to NULL and let the child thread allocate a SHSTK
>     for itself.  There are two issues in this approach: It is not
>     compatible with existing code that does inline syscall and it cannot
>     handle signals before the child can successfully allocate a SHSTK.
> 
> (2) For (clone_flags & CLONE_VFORK), the child uses the existing SHSTK.
> 
> (3) For all other cases, the SHSTK is copied/reused whenever the parent or
>     the child does a call/ret.
> 
> This patch handles cases (1) & (2).  Case (3) is handled in the SHSTK page
> fault patches.
> 
> A 64-bit SHSTK has a fixed size of RLIMIT_STACK. A compat-mode thread SHSTK
> has a fixed size of 1/4 RLIMIT_STACK.  This allows more threads to share a
> 32-bit address space.

I am not understanding this part. :) Entries are sizeof(unsigned long),
yes? A 1/2 RLIMIT_STACK would cover 32-bit, but 1/4 is less, so why does
that provide for more threads?

> 
> Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
> ---
>  arch/x86/include/asm/cet.h         |  2 ++
>  arch/x86/include/asm/mmu_context.h |  3 +++
>  arch/x86/kernel/cet.c              | 41 ++++++++++++++++++++++++++++++
>  arch/x86/kernel/process.c          |  7 +++++
>  4 files changed, 53 insertions(+)
> 
> diff --git a/arch/x86/include/asm/cet.h b/arch/x86/include/asm/cet.h
> index 409d4f91a0dc..9a3e2da9c1c4 100644
> --- a/arch/x86/include/asm/cet.h
> +++ b/arch/x86/include/asm/cet.h
> @@ -19,10 +19,12 @@ struct cet_status {
>  
>  #ifdef CONFIG_X86_INTEL_CET
>  int cet_setup_shstk(void);
> +int cet_setup_thread_shstk(struct task_struct *p);
>  void cet_disable_free_shstk(struct task_struct *p);
>  int cet_restore_signal(bool ia32, struct sc_ext *sc);
>  int cet_setup_signal(bool ia32, unsigned long rstor, struct sc_ext *sc);
>  #else
> +static inline int cet_setup_thread_shstk(struct task_struct *p) { return 0; }
>  static inline void cet_disable_free_shstk(struct task_struct *p) {}
>  static inline int cet_restore_signal(bool ia32, struct sc_ext *sc) { return -EINVAL; }
>  static inline int cet_setup_signal(bool ia32, unsigned long rstor,
> diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> index 5f33924e200f..6a8189308823 100644
> --- a/arch/x86/include/asm/mmu_context.h
> +++ b/arch/x86/include/asm/mmu_context.h
> @@ -13,6 +13,7 @@
>  #include <asm/tlbflush.h>
>  #include <asm/paravirt.h>
>  #include <asm/mpx.h>
> +#include <asm/cet.h>
>  #include <asm/debugreg.h>
>  
>  extern atomic64_t last_mm_ctx_id;
> @@ -230,6 +231,8 @@ do {						\
>  #else
>  #define deactivate_mm(tsk, mm)			\
>  do {						\
> +	if (!tsk->vfork_done)			\
> +		cet_disable_free_shstk(tsk);	\
>  	load_gs_index(0);			\
>  	loadsegment(fs, 0);			\
>  } while (0)
> diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c
> index cba5c7656aab..5b45abda80a1 100644
> --- a/arch/x86/kernel/cet.c
> +++ b/arch/x86/kernel/cet.c
> @@ -170,6 +170,47 @@ int cet_setup_shstk(void)
>  	return 0;
>  }
>  
> +int cet_setup_thread_shstk(struct task_struct *tsk)
> +{
> +	unsigned long addr, size;
> +	struct cet_user_state *state;
> +	struct cet_status *cet = &tsk->thread.cet;
> +
> +	if (!cet->shstk_enabled)
> +		return 0;
> +
> +	state = get_xsave_addr(&tsk->thread.fpu.state.xsave,
> +			       XFEATURE_CET_USER);
> +
> +	if (!state)
> +		return -EINVAL;
> +
> +	size = rlimit(RLIMIT_STACK);

Is SHSTK incompatible with RLIM_INFINITY stack rlimits?

> +
> +	/*
> +	 * Compat-mode pthreads share a limited address space.
> +	 * If each function call takes an average of four slots
> +	 * stack space, we need 1/4 of stack size for shadow stack.
> +	 */
> +	if (in_compat_syscall())
> +		size /= 4;
> +
> +	addr = alloc_shstk(size);

I assume it'd fail here, but I worry about Stack Clash style attacks.
I'd like to see test cases that make sure the SHSTK gap is working
correctly.

-Kees

> +
> +	if (IS_ERR((void *)addr)) {
> +		cet->shstk_base = 0;
> +		cet->shstk_size = 0;
> +		cet->shstk_enabled = 0;
> +		return PTR_ERR((void *)addr);
> +	}
> +
> +	fpu__prepare_write(&tsk->thread.fpu);
> +	state->user_ssp = (u64)(addr + size);
> +	cet->shstk_base = addr;
> +	cet->shstk_size = size;
> +	return 0;
> +}
> +
>  void cet_disable_free_shstk(struct task_struct *tsk)
>  {
>  	struct cet_status *cet = &tsk->thread.cet;
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index e102e63de641..7098618142f2 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -110,6 +110,7 @@ void exit_thread(struct task_struct *tsk)
>  
>  	free_vm86(t);
>  
> +	cet_disable_free_shstk(tsk);
>  	fpu__drop(fpu);
>  }
>  
> @@ -180,6 +181,12 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
>  	if (clone_flags & CLONE_SETTLS)
>  		ret = set_new_tls(p, tls);
>  
> +#ifdef CONFIG_X86_64
> +	/* Allocate a new shadow stack for pthread */
> +	if (!ret && (clone_flags & (CLONE_VFORK | CLONE_VM)) == CLONE_VM)
> +		ret = cet_setup_thread_shstk(p);
> +#endif
> +
>  	if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP)))
>  		io_bitmap_share(p);
>  
> -- 
> 2.21.0
>
Yu-cheng Yu March 25, 2020, 9:51 p.m. UTC | #2
On Tue, 2020-02-25 at 13:29 -0800, Kees Cook wrote:
> On Wed, Feb 05, 2020 at 10:19:33AM -0800, Yu-cheng Yu wrote:
> > [...]
> > A 64-bit SHSTK has a fixed size of RLIMIT_STACK. A compat-mode thread SHSTK
> > has a fixed size of 1/4 RLIMIT_STACK.  This allows more threads to share a
> > 32-bit address space.
> 
> I am not understanding this part. :) Entries are sizeof(unsigned long),
> yes? A 1/2 RLIMIT_STACK would cover 32-bit, but 1/4 is less, so why does
> that provide for more threads?

Each thread has a separate shadow stack.  If each shadow stack is smaller, the
address space can accommodate more shadow stack allocations.

> >[...]
> > diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c
> > index cba5c7656aab..5b45abda80a1 100644
> > --- a/arch/x86/kernel/cet.c
> > +++ b/arch/x86/kernel/cet.c
> > @@ -170,6 +170,47 @@ int cet_setup_shstk(void)
> >  	return 0;
> >  }
> >  
> > +int cet_setup_thread_shstk(struct task_struct *tsk)
> > +{
> > +	unsigned long addr, size;
> > +	struct cet_user_state *state;
> > +	struct cet_status *cet = &tsk->thread.cet;
> > +
> > +	if (!cet->shstk_enabled)
> > +		return 0;
> > +
> > +	state = get_xsave_addr(&tsk->thread.fpu.state.xsave,
> > +			       XFEATURE_CET_USER);
> > +
> > +	if (!state)
> > +		return -EINVAL;
> > +
> > +	size = rlimit(RLIMIT_STACK);
> 
> Is SHSTK incompatible with RLIM_INFINITY stack rlimits?

I will change it to:

	size = min(rlimit(RLIMIT_STACK), 4 GB);

> 
> > +
> > +	/*
> > +	 * Compat-mode pthreads share a limited address space.
> > +	 * If each function call takes an average of four slots
> > +	 * stack space, we need 1/4 of stack size for shadow stack.
> > +	 */
> > +	if (in_compat_syscall())
> > +		size /= 4;
> > +
> > +	addr = alloc_shstk(size);
> 
> I assume it'd fail here, but I worry about Stack Clash style attacks.
> I'd like to see test cases that make sure the SHSTK gap is working
> correctly.

I will work on some tests.

Thanks,
Yu-cheng
diff mbox series

Patch

diff --git a/arch/x86/include/asm/cet.h b/arch/x86/include/asm/cet.h
index 409d4f91a0dc..9a3e2da9c1c4 100644
--- a/arch/x86/include/asm/cet.h
+++ b/arch/x86/include/asm/cet.h
@@ -19,10 +19,12 @@  struct cet_status {
 
 #ifdef CONFIG_X86_INTEL_CET
 int cet_setup_shstk(void);
+int cet_setup_thread_shstk(struct task_struct *p);
 void cet_disable_free_shstk(struct task_struct *p);
 int cet_restore_signal(bool ia32, struct sc_ext *sc);
 int cet_setup_signal(bool ia32, unsigned long rstor, struct sc_ext *sc);
 #else
+static inline int cet_setup_thread_shstk(struct task_struct *p) { return 0; }
 static inline void cet_disable_free_shstk(struct task_struct *p) {}
 static inline int cet_restore_signal(bool ia32, struct sc_ext *sc) { return -EINVAL; }
 static inline int cet_setup_signal(bool ia32, unsigned long rstor,
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 5f33924e200f..6a8189308823 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -13,6 +13,7 @@ 
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
 #include <asm/mpx.h>
+#include <asm/cet.h>
 #include <asm/debugreg.h>
 
 extern atomic64_t last_mm_ctx_id;
@@ -230,6 +231,8 @@  do {						\
 #else
 #define deactivate_mm(tsk, mm)			\
 do {						\
+	if (!tsk->vfork_done)			\
+		cet_disable_free_shstk(tsk);	\
 	load_gs_index(0);			\
 	loadsegment(fs, 0);			\
 } while (0)
diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c
index cba5c7656aab..5b45abda80a1 100644
--- a/arch/x86/kernel/cet.c
+++ b/arch/x86/kernel/cet.c
@@ -170,6 +170,47 @@  int cet_setup_shstk(void)
 	return 0;
 }
 
+int cet_setup_thread_shstk(struct task_struct *tsk)
+{
+	unsigned long addr, size;
+	struct cet_user_state *state;
+	struct cet_status *cet = &tsk->thread.cet;
+
+	if (!cet->shstk_enabled)
+		return 0;
+
+	state = get_xsave_addr(&tsk->thread.fpu.state.xsave,
+			       XFEATURE_CET_USER);
+
+	if (!state)
+		return -EINVAL;
+
+	size = rlimit(RLIMIT_STACK);
+
+	/*
+	 * Compat-mode pthreads share a limited address space.
+	 * If each function call takes an average of four slots
+	 * stack space, we need 1/4 of stack size for shadow stack.
+	 */
+	if (in_compat_syscall())
+		size /= 4;
+
+	addr = alloc_shstk(size);
+
+	if (IS_ERR((void *)addr)) {
+		cet->shstk_base = 0;
+		cet->shstk_size = 0;
+		cet->shstk_enabled = 0;
+		return PTR_ERR((void *)addr);
+	}
+
+	fpu__prepare_write(&tsk->thread.fpu);
+	state->user_ssp = (u64)(addr + size);
+	cet->shstk_base = addr;
+	cet->shstk_size = size;
+	return 0;
+}
+
 void cet_disable_free_shstk(struct task_struct *tsk)
 {
 	struct cet_status *cet = &tsk->thread.cet;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e102e63de641..7098618142f2 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -110,6 +110,7 @@  void exit_thread(struct task_struct *tsk)
 
 	free_vm86(t);
 
+	cet_disable_free_shstk(tsk);
 	fpu__drop(fpu);
 }
 
@@ -180,6 +181,12 @@  int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	if (clone_flags & CLONE_SETTLS)
 		ret = set_new_tls(p, tls);
 
+#ifdef CONFIG_X86_64
+	/* Allocate a new shadow stack for pthread */
+	if (!ret && (clone_flags & (CLONE_VFORK | CLONE_VM)) == CLONE_VM)
+		ret = cet_setup_thread_shstk(p);
+#endif
+
 	if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP)))
 		io_bitmap_share(p);