diff mbox series

[RFC] x86: entry: flush the cache if syscall error

Message ID 20180910191002.350195-1-kristen@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [RFC] x86: entry: flush the cache if syscall error | expand

Commit Message

Kristen Carlson Accardi Sept. 10, 2018, 7:10 p.m. UTC
This patch aims to make it harder to perform cache timing attacks on data
left behind by system calls. If we have an error returned from a syscall,
flush the L1 cache.

Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
---
 arch/x86/Kconfig        |  8 ++++++++
 arch/x86/entry/common.c | 20 ++++++++++++++++++++
 2 files changed, 28 insertions(+)

Comments

Jann Horn Sept. 10, 2018, 8:32 p.m. UTC | #1
On Mon, Sep 10, 2018 at 9:14 PM Kristen Carlson Accardi
<kristen@linux.intel.com> wrote:
> This patch aims to make it harder to perform cache timing attacks on data
> left behind by system calls. If we have an error returned from a syscall,
> flush the L1 cache.

What kind of performance impact does this have on a process that e.g.
attempts to access a large number of paths to which it has no access,
and what is the impact on the hyperthread?

(You may want to also CC the X86 maintainers (especially Andy
Lutomirski) and LKML on this series relatively early - people on the
kernel-hardening list tend to have a different focus compared to the
relevant maintainers, so there's some feedback that you'll probably
only get once you submit this to places other than the
kernel-hardening@ list.)

> Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
> ---
>  arch/x86/Kconfig        |  8 ++++++++
>  arch/x86/entry/common.c | 20 ++++++++++++++++++++
>  2 files changed, 28 insertions(+)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index c5ff296bc5d1..8a67642ff9fe 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -445,6 +445,14 @@ config RETPOLINE
>           code are eliminated. Since this includes the syscall entry path,
>           it is not entirely pointless.
>
> +config SYSCALL_FLUSH
> +       bool "Clear L1 Cache on syscall errors"
> +       default y
> +       help
> +         Select to allow the L1 cache to be cleared upon return of
> +         an error code from a syscall. This will reduce the likelyhood of
> +         speculative execution style attacks on syscalls.

s/L1/L1D/ ?

>  config INTEL_RDT
>         bool "Intel Resource Director Technology support"
>         default n
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 3b2490b81918..77beff541013 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -268,6 +268,22 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
>         prepare_exit_to_usermode(regs);
>  }
>
> +__visible inline void l1_cache_flush(struct pt_regs *regs)
> +{
> +       if (IS_ENABLED(CONFIG_SYSCALL_FLUSH)) {
> +               if (regs->ax == 0 || regs->ax == -EAGAIN ||
> +                   regs->ax == -EEXIST || regs->ax == -ENOENT ||
> +                   regs->ax == -EXDEV || regs->ax == -ETIMEDOUT ||
> +                   regs->ax == -ENOTCONN || regs->ax == -EINPROGRESS)
> +                       return;
> +
> +               if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {

Style nit: Maybe merge this condition into the first if() ?

> +                       wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
> +                       return;
> +               }
> +       }
> +}
> +
>  #ifdef CONFIG_X86_64
>  __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
>  {
> @@ -290,6 +306,8 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
>                 regs->ax = sys_call_table[nr](regs);
>         }
>
> +       l1_cache_flush(regs);
> +
>         syscall_return_slowpath(regs);
>  }
>  #endif
> @@ -338,6 +356,8 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
>  #endif /* CONFIG_IA32_EMULATION */
>         }
>
> +       l1_cache_flush(regs);
> +
>         syscall_return_slowpath(regs);
>  }
>
> --
> 2.14.4
>
Greg KH Sept. 11, 2018, 8:41 a.m. UTC | #2
On Mon, Sep 10, 2018 at 12:10:02PM -0700, Kristen Carlson Accardi wrote:
> This patch aims to make it harder to perform cache timing attacks on data
> left behind by system calls. If we have an error returned from a syscall,
> flush the L1 cache.
> 
> Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
> ---
>  arch/x86/Kconfig        |  8 ++++++++
>  arch/x86/entry/common.c | 20 ++++++++++++++++++++
>  2 files changed, 28 insertions(+)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index c5ff296bc5d1..8a67642ff9fe 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -445,6 +445,14 @@ config RETPOLINE
>  	  code are eliminated. Since this includes the syscall entry path,
>  	  it is not entirely pointless.
>  
> +config SYSCALL_FLUSH
> +	bool "Clear L1 Cache on syscall errors"
> +	default y

"y" normally is only for "your machine will not boot without this
option", and I don't think that's the case here :)

> +	help
> +	  Select to allow the L1 cache to be cleared upon return of
> +	  an error code from a syscall. This will reduce the likelyhood of
> +	  speculative execution style attacks on syscalls.

Shouldn't this help text refer to the fact that this needs CPU support
for this type of functionality?

I like the idea, as a "gadget" normally only is used when an
out-of-bands check happens, which implies someone could be trying to do
something "bad", nice job.

thanks,

greg k-h
Kristen Carlson Accardi Sept. 11, 2018, 3:58 p.m. UTC | #3
On Mon, 2018-09-10 at 22:32 +0200, Jann Horn wrote:
> On Mon, Sep 10, 2018 at 9:14 PM Kristen Carlson Accardi
> <kristen@linux.intel.com> wrote:
> > This patch aims to make it harder to perform cache timing attacks
> > on data
> > left behind by system calls. If we have an error returned from a
> > syscall,
> > flush the L1 cache.
> 
> What kind of performance impact does this have on a process that e.g.
> attempts to access a large number of paths to which it has no access,
> and what is the impact on the hyperthread?

This will undoubtedly have a performance impact - I can certainly run
some benchmarks to find out just how much in this scenario.

> 
> (You may want to also CC the X86 maintainers (especially Andy
> Lutomirski) and LKML on this series relatively early - people on the
> kernel-hardening list tend to have a different focus compared to the
> relevant maintainers, so there's some feedback that you'll probably
> only get once you submit this to places other than the
> kernel-hardening@ list.)

Thank you yes, I admit I decided to try to float this idea past a
friendlier audience first :).

> 
> > Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
> > ---
> >  arch/x86/Kconfig        |  8 ++++++++
> >  arch/x86/entry/common.c | 20 ++++++++++++++++++++
> >  2 files changed, 28 insertions(+)
> > 
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index c5ff296bc5d1..8a67642ff9fe 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -445,6 +445,14 @@ config RETPOLINE
> >           code are eliminated. Since this includes the syscall
> > entry path,
> >           it is not entirely pointless.
> > 
> > +config SYSCALL_FLUSH
> > +       bool "Clear L1 Cache on syscall errors"
> > +       default y
> > +       help
> > +         Select to allow the L1 cache to be cleared upon return of
> > +         an error code from a syscall. This will reduce the
> > likelyhood of
> > +         speculative execution style attacks on syscalls.
> 
> s/L1/L1D/ ?

I can change this - the documentation for this msr mentioned that on
some processors the icache might be impacted as well - think that's
worth mentioning?

> 
> >  config INTEL_RDT
> >         bool "Intel Resource Director Technology support"
> >         default n
> > diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> > index 3b2490b81918..77beff541013 100644
> > --- a/arch/x86/entry/common.c
> > +++ b/arch/x86/entry/common.c
> > @@ -268,6 +268,22 @@ __visible inline void
> > syscall_return_slowpath(struct pt_regs *regs)
> >         prepare_exit_to_usermode(regs);
> >  }
> > 
> > +__visible inline void l1_cache_flush(struct pt_regs *regs)
> > +{
> > +       if (IS_ENABLED(CONFIG_SYSCALL_FLUSH)) {
> > +               if (regs->ax == 0 || regs->ax == -EAGAIN ||
> > +                   regs->ax == -EEXIST || regs->ax == -ENOENT ||
> > +                   regs->ax == -EXDEV || regs->ax == -ETIMEDOUT ||
> > +                   regs->ax == -ENOTCONN || regs->ax ==
> > -EINPROGRESS)
> > +                       return;
> > +
> > +               if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
> 
> Style nit: Maybe merge this condition into the first if() ?
> 
> > +                       wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
> > +                       return;
> > +               }
> > +       }
> > +}
> > +
> >  #ifdef CONFIG_X86_64
> >  __visible void do_syscall_64(unsigned long nr, struct pt_regs
> > *regs)
> >  {
> > @@ -290,6 +306,8 @@ __visible void do_syscall_64(unsigned long nr,
> > struct pt_regs *regs)
> >                 regs->ax = sys_call_table[nr](regs);
> >         }
> > 
> > +       l1_cache_flush(regs);
> > +
> >         syscall_return_slowpath(regs);
> >  }
> >  #endif
> > @@ -338,6 +356,8 @@ static __always_inline void
> > do_syscall_32_irqs_on(struct pt_regs *regs)
> >  #endif /* CONFIG_IA32_EMULATION */
> >         }
> > 
> > +       l1_cache_flush(regs);
> > +
> >         syscall_return_slowpath(regs);
> >  }
> > 
> > --
> > 2.14.4
> >
Kristen Carlson Accardi Sept. 11, 2018, 4:01 p.m. UTC | #4
On Tue, 2018-09-11 at 10:41 +0200, Greg KH wrote:
> On Mon, Sep 10, 2018 at 12:10:02PM -0700, Kristen Carlson Accardi
> wrote:
> > This patch aims to make it harder to perform cache timing attacks
> > on data
> > left behind by system calls. If we have an error returned from a
> > syscall,
> > flush the L1 cache.
> > 
> > Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
> > ---
> >  arch/x86/Kconfig        |  8 ++++++++
> >  arch/x86/entry/common.c | 20 ++++++++++++++++++++
> >  2 files changed, 28 insertions(+)
> > 
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index c5ff296bc5d1..8a67642ff9fe 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -445,6 +445,14 @@ config RETPOLINE
> >  	  code are eliminated. Since this includes the syscall
> > entry path,
> >  	  it is not entirely pointless.
> >  
> > +config SYSCALL_FLUSH
> > +	bool "Clear L1 Cache on syscall errors"
> > +	default y
> 
> "y" normally is only for "your machine will not boot without this
> option", and I don't think that's the case here :)

true. :). I wish there was a way to indicate that this option provided
enhanced security so that people could just select all of these
features at once and I could make it select based on that option.

> 
> > +	help
> > +	  Select to allow the L1 cache to be cleared upon return
> > of
> > +	  an error code from a syscall. This will reduce the
> > likelyhood of
> > +	  speculative execution style attacks on syscalls.
> 
> Shouldn't this help text refer to the fact that this needs CPU
> support
> for this type of functionality?

OK, I will change it.

> 
> I like the idea, as a "gadget" normally only is used when an
> out-of-bands check happens, which implies someone could be trying to
> do
> something "bad", nice job.
> 
> thanks,
> 
> greg k-h

Thanks!
Eric Biggers Sept. 11, 2018, 4:06 p.m. UTC | #5
On Mon, Sep 10, 2018 at 12:10:02PM -0700, Kristen Carlson Accardi wrote:
> This patch aims to make it harder to perform cache timing attacks on data
> left behind by system calls. If we have an error returned from a syscall,
> flush the L1 cache.

Which L1 cache?  There's no guarantee the task stayed on the same CPU...

- Eric
Jann Horn Sept. 11, 2018, 4:12 p.m. UTC | #6
On Tue, Sep 11, 2018 at 5:58 PM Kristen C Accardi
<kristen@linux.intel.com> wrote:
> On Mon, 2018-09-10 at 22:32 +0200, Jann Horn wrote:
> > On Mon, Sep 10, 2018 at 9:14 PM Kristen Carlson Accardi
> > <kristen@linux.intel.com> wrote:
> > > This patch aims to make it harder to perform cache timing attacks
> > > on data
> > > left behind by system calls. If we have an error returned from a
> > > syscall,
> > > flush the L1 cache.
[...]
> > > +config SYSCALL_FLUSH
> > > +       bool "Clear L1 Cache on syscall errors"
> > > +       default y
> > > +       help
> > > +         Select to allow the L1 cache to be cleared upon return of
> > > +         an error code from a syscall. This will reduce the
> > > likelyhood of
> > > +         speculative execution style attacks on syscalls.
> >
> > s/L1/L1D/ ?
>
> I can change this - the documentation for this msr mentioned that on
> some processors the icache might be impacted as well - think that's
> worth mentioning?

Ah, whoops, I didn't realize that it isn't as clear-cut as "just flush
L1D". I should've looked for the documentation first... nevermind, I
guess. I don't want to turn this into a documentation bikeshed.
Jann Horn Sept. 11, 2018, 6:02 p.m. UTC | #7
On Mon, Sep 10, 2018 at 9:14 PM Kristen Carlson Accardi
<kristen@linux.intel.com> wrote:
> This patch aims to make it harder to perform cache timing attacks on data
> left behind by system calls. If we have an error returned from a syscall,
> flush the L1 cache.

How much protection does this provide, given that it e.g. doesn't
flush L2/L3 and doesn't prevent data leakage through hyperthreading
and cache coherency? Is an L2/L3-based attack expected to be harder
than an L1D-based one?
Kristen Carlson Accardi Sept. 12, 2018, 5:29 p.m. UTC | #8
On Tue, 2018-09-11 at 09:06 -0700, Eric Biggers wrote:
> On Mon, Sep 10, 2018 at 12:10:02PM -0700, Kristen Carlson Accardi
> wrote:
> > This patch aims to make it harder to perform cache timing attacks
> > on data
> > left behind by system calls. If we have an error returned from a
> > syscall,
> > flush the L1 cache.
> 
> Which L1 cache?  There's no guarantee the task stayed on the same
> CPU...

While this is true, it is unlikely that the task switched CPUs for this
type of flow (i.e. an error path, presumably caught early-ish), and
worst case this would just mean we were wiping the wrong cache. I can
add a comment to indicate this scenario.
Kristen Carlson Accardi Sept. 12, 2018, 5:34 p.m. UTC | #9
On Tue, 2018-09-11 at 20:02 +0200, Jann Horn wrote:
> On Mon, Sep 10, 2018 at 9:14 PM Kristen Carlson Accardi
> <kristen@linux.intel.com> wrote:
> > This patch aims to make it harder to perform cache timing attacks
> > on data
> > left behind by system calls. If we have an error returned from a
> > syscall,
> > flush the L1 cache.
> 
> How much protection does this provide, given that it e.g. doesn't
> flush L2/L3 and doesn't prevent data leakage through hyperthreading
> and cache coherency? Is an L2/L3-based attack expected to be harder
> than an L1D-based one?

My reasoning here is that L2/L3 caches can be partitioned using
something like CAT (maybe), but L1 cannot. So IMO L1 is the case that
needs coverage. Also, while this doesn't address a specific exploit,
the idea is that attacks on data in L1D are more common, and the
performance penalty for L2/L3 flushes would be too high without a
specific exploit in mind.
Eric Biggers Sept. 12, 2018, 5:45 p.m. UTC | #10
On Wed, Sep 12, 2018 at 10:29:49AM -0700, Kristen C Accardi wrote:
> On Tue, 2018-09-11 at 09:06 -0700, Eric Biggers wrote:
> > On Mon, Sep 10, 2018 at 12:10:02PM -0700, Kristen Carlson Accardi
> > wrote:
> > > This patch aims to make it harder to perform cache timing attacks
> > > on data
> > > left behind by system calls. If we have an error returned from a
> > > syscall,
> > > flush the L1 cache.
> > 
> > Which L1 cache?  There's no guarantee the task stayed on the same
> > CPU...
> 
> While this is true, it is unlikely that the task switched CPUs for this
> type of flow (i.e. an error path, presumably caught early-ish), 

How you do know it's unlikely?  What degrees of freedom might an attacker have
in controlling this?

> worst case this would just mean we were wiping the wrong cache. I can
> add a comment to indicate this scenario.
> 

IOW, the protection may be useless?

- Eric
Rik van Riel Sept. 12, 2018, 6:19 p.m. UTC | #11
On Wed, 2018-09-12 at 10:45 -0700, Eric Biggers wrote:
> On Wed, Sep 12, 2018 at 10:29:49AM -0700, Kristen C Accardi wrote:
> > On Tue, 2018-09-11 at 09:06 -0700, Eric Biggers wrote:
> > > On Mon, Sep 10, 2018 at 12:10:02PM -0700, Kristen Carlson Accardi
> > > wrote:
> > > > This patch aims to make it harder to perform cache timing
> > > > attacks
> > > > on data
> > > > left behind by system calls. If we have an error returned from
> > > > a
> > > > syscall,
> > > > flush the L1 cache.
> > > 
> > > Which L1 cache?  There's no guarantee the task stayed on the same
> > > CPU...
> > 
> > While this is true, it is unlikely that the task switched CPUs for
> > this
> > type of flow (i.e. an error path, presumably caught early-ish), 
> 
> How you do know it's unlikely?  What degrees of freedom might an
> attacker have
> in controlling this?
> 
> > worst case this would just mean we were wiping the wrong cache. I
> > can
> > add a comment to indicate this scenario.
> > 
> 
> IOW, the protection may be useless?

If the task gets moved to a different CPU, won't that
completely foil a timing attack?

In other words, this protection would protect against
an attack on the same CPU, and is unnecessary when a
task switches CPUs?

What am I missing?
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c5ff296bc5d1..8a67642ff9fe 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -445,6 +445,14 @@  config RETPOLINE
 	  code are eliminated. Since this includes the syscall entry path,
 	  it is not entirely pointless.
 
+config SYSCALL_FLUSH
+	bool "Clear L1 Cache on syscall errors"
+	default y
+	help
+	  Select to allow the L1 cache to be cleared upon return of
+	  an error code from a syscall. This will reduce the likelyhood of
+	  speculative execution style attacks on syscalls.
+
 config INTEL_RDT
 	bool "Intel Resource Director Technology support"
 	default n
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 3b2490b81918..77beff541013 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -268,6 +268,22 @@  __visible inline void syscall_return_slowpath(struct pt_regs *regs)
 	prepare_exit_to_usermode(regs);
 }
 
+__visible inline void l1_cache_flush(struct pt_regs *regs)
+{
+	if (IS_ENABLED(CONFIG_SYSCALL_FLUSH)) {
+		if (regs->ax == 0 || regs->ax == -EAGAIN ||
+		    regs->ax == -EEXIST || regs->ax == -ENOENT ||
+		    regs->ax == -EXDEV || regs->ax == -ETIMEDOUT ||
+		    regs->ax == -ENOTCONN || regs->ax == -EINPROGRESS)
+			return;
+
+		if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
+			wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+			return;
+		}
+	}
+}
+
 #ifdef CONFIG_X86_64
 __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 {
@@ -290,6 +306,8 @@  __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 		regs->ax = sys_call_table[nr](regs);
 	}
 
+	l1_cache_flush(regs);
+
 	syscall_return_slowpath(regs);
 }
 #endif
@@ -338,6 +356,8 @@  static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
 #endif /* CONFIG_IA32_EMULATION */
 	}
 
+	l1_cache_flush(regs);
+
 	syscall_return_slowpath(regs);
 }