diff mbox series

[v2,02/13] stackleak: move skip_erasing() check earlier

Message ID 20220427173128.2603085-3-mark.rutland@arm.com (mailing list archive)
State New, archived
Headers show
Series stackleak: fixes and rework | expand

Commit Message

Mark Rutland April 27, 2022, 5:31 p.m. UTC
In stackleak_erase() we check skip_erasing() after accessing some fields
from current. As generating the address of current uses asm which
hazards with the static branch asm, this work is always performed, even
when the static branch is patched to jump to the return a the end of the
function.

This patch avoids this redundant work by moving the skip_erasing() check
earlier.

To avoid complicating initialization within stackleak_erase(), the body
of the function is split out into a __stackleak_erase() helper, with the
check left in a wrapper function. The __stackleak_erase() helper is
marked __always_inline to ensure that this is inlined into
stackleak_erase() and not instrumented.

Before this patch, on x86-64 w/ GCC 11.1.0 the start of the function is:

<stackleak_erase>:
   65 48 8b 04 25 00 00    mov    %gs:0x0,%rax
   00 00
   48 8b 48 20             mov    0x20(%rax),%rcx
   48 8b 80 98 0a 00 00    mov    0xa98(%rax),%rax
   66 90                   xchg   %ax,%ax  <------------ static branch
   48 89 c2                mov    %rax,%rdx
   48 29 ca                sub    %rcx,%rdx
   48 81 fa ff 3f 00 00    cmp    $0x3fff,%rdx

After this patch, on x86-64 w/ GCC 11.1.0 the start of the function is:

<stackleak_erase>:
   0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)  <--- static branch
   65 48 8b 04 25 00 00    mov    %gs:0x0,%rax
   00 00
   48 8b 48 20             mov    0x20(%rax),%rcx
   48 8b 80 98 0a 00 00    mov    0xa98(%rax),%rax
   48 89 c2                mov    %rax,%rdx
   48 29 ca                sub    %rcx,%rdx
   48 81 fa ff 3f 00 00    cmp    $0x3fff,%rdx

Before this patch, on arm64 w/ GCC 11.1.0 the start of the function is:

<stackleak_erase>:
   d503245f        bti     c
   d5384100        mrs     x0, sp_el0
   f9401003        ldr     x3, [x0, #32]
   f9451000        ldr     x0, [x0, #2592]
   d503201f        nop  <------------------------------- static branch
   d503233f        paciasp
   cb030002        sub     x2, x0, x3
   d287ffe1        mov     x1, #0x3fff
   eb01005f        cmp     x2, x1

After this patch, on arm64 w/ GCC 11.1.0 the start of the function is:

<stackleak_erase>:
   d503245f        bti     c
   d503201f        nop  <------------------------------- static branch
   d503233f        paciasp
   d5384100        mrs     x0, sp_el0
   f9401003        ldr     x3, [x0, #32]
   d287ffe1        mov     x1, #0x3fff
   f9451000        ldr     x0, [x0, #2592]
   cb030002        sub     x2, x0, x3
   eb01005f        cmp     x2, x1

While this may not be a huge win on its own, moving the static branch
will permit further optimization of the body of the function in
subsequent patches.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Popov <alex.popov@linux.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
---
 kernel/stackleak.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

Comments

Alexander Popov May 8, 2022, 5:44 p.m. UTC | #1
On 27.04.2022 20:31, Mark Rutland wrote:
> In stackleak_erase() we check skip_erasing() after accessing some fields
> from current. As generating the address of current uses asm which
> hazards with the static branch asm, this work is always performed, even
> when the static branch is patched to jump to the return a the end of the
> function.

Nice find!

> This patch avoids this redundant work by moving the skip_erasing() check
> earlier.
> 
> To avoid complicating initialization within stackleak_erase(), the body
> of the function is split out into a __stackleak_erase() helper, with the
> check left in a wrapper function. The __stackleak_erase() helper is
> marked __always_inline to ensure that this is inlined into
> stackleak_erase() and not instrumented.
> 
> Before this patch, on x86-64 w/ GCC 11.1.0 the start of the function is:
> 
> <stackleak_erase>:
>     65 48 8b 04 25 00 00    mov    %gs:0x0,%rax
>     00 00
>     48 8b 48 20             mov    0x20(%rax),%rcx
>     48 8b 80 98 0a 00 00    mov    0xa98(%rax),%rax
>     66 90                   xchg   %ax,%ax  <------------ static branch
>     48 89 c2                mov    %rax,%rdx
>     48 29 ca                sub    %rcx,%rdx
>     48 81 fa ff 3f 00 00    cmp    $0x3fff,%rdx
> 
> After this patch, on x86-64 w/ GCC 11.1.0 the start of the function is:
> 
> <stackleak_erase>:
>     0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)  <--- static branch
>     65 48 8b 04 25 00 00    mov    %gs:0x0,%rax
>     00 00
>     48 8b 48 20             mov    0x20(%rax),%rcx
>     48 8b 80 98 0a 00 00    mov    0xa98(%rax),%rax
>     48 89 c2                mov    %rax,%rdx
>     48 29 ca                sub    %rcx,%rdx
>     48 81 fa ff 3f 00 00    cmp    $0x3fff,%rdx
> 
> Before this patch, on arm64 w/ GCC 11.1.0 the start of the function is:
> 
> <stackleak_erase>:
>     d503245f        bti     c
>     d5384100        mrs     x0, sp_el0
>     f9401003        ldr     x3, [x0, #32]
>     f9451000        ldr     x0, [x0, #2592]
>     d503201f        nop  <------------------------------- static branch
>     d503233f        paciasp
>     cb030002        sub     x2, x0, x3
>     d287ffe1        mov     x1, #0x3fff
>     eb01005f        cmp     x2, x1
> 
> After this patch, on arm64 w/ GCC 11.1.0 the start of the function is:
> 
> <stackleak_erase>:
>     d503245f        bti     c
>     d503201f        nop  <------------------------------- static branch
>     d503233f        paciasp
>     d5384100        mrs     x0, sp_el0
>     f9401003        ldr     x3, [x0, #32]
>     d287ffe1        mov     x1, #0x3fff
>     f9451000        ldr     x0, [x0, #2592]
>     cb030002        sub     x2, x0, x3
>     eb01005f        cmp     x2, x1
> 
> While this may not be a huge win on its own, moving the static branch
> will permit further optimization of the body of the function in
> subsequent patches.
> 
> Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> Cc: Alexander Popov <alex.popov@linux.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Kees Cook <keescook@chromium.org>
> ---
>   kernel/stackleak.c | 13 +++++++++----
>   1 file changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/stackleak.c b/kernel/stackleak.c
> index ddb5a7f48d69e..753eab797a04d 100644
> --- a/kernel/stackleak.c
> +++ b/kernel/stackleak.c
> @@ -70,7 +70,7 @@ late_initcall(stackleak_sysctls_init);
>   #define skip_erasing()	false
>   #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
>   
> -asmlinkage void noinstr stackleak_erase(void)
> +static __always_inline void __stackleak_erase(void)

Are you sure that __stackleak_erase() doesn't need asmlinkage and noinstr as well?

>   {
>   	/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
>   	unsigned long kstack_ptr = current->lowest_stack;
> @@ -78,9 +78,6 @@ asmlinkage void noinstr stackleak_erase(void)
>   	unsigned int poison_count = 0;
>   	const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
>   
> -	if (skip_erasing())
> -		return;
> -
>   	/* Check that 'lowest_stack' value is sane */
>   	if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
>   		kstack_ptr = boundary;
> @@ -125,6 +122,14 @@ asmlinkage void noinstr stackleak_erase(void)
>   	current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
>   }
>   
> +asmlinkage void noinstr stackleak_erase(void)
> +{
> +	if (skip_erasing())
> +		return;
> +
> +	__stackleak_erase();
> +}
> +
>   void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
>   {
>   	unsigned long sp = current_stack_pointer;
Mark Rutland May 10, 2022, 11:40 a.m. UTC | #2
On Sun, May 08, 2022 at 08:44:56PM +0300, Alexander Popov wrote:
> On 27.04.2022 20:31, Mark Rutland wrote:
> > In stackleak_erase() we check skip_erasing() after accessing some fields
> > from current. As generating the address of current uses asm which
> > hazards with the static branch asm, this work is always performed, even
> > when the static branch is patched to jump to the return a the end of the
> > function.
> 
> Nice find!
> 
> > This patch avoids this redundant work by moving the skip_erasing() check
> > earlier.
> > 
> > To avoid complicating initialization within stackleak_erase(), the body
> > of the function is split out into a __stackleak_erase() helper, with the
> > check left in a wrapper function. The __stackleak_erase() helper is
> > marked __always_inline to ensure that this is inlined into
> > stackleak_erase() and not instrumented.

[...]

> > diff --git a/kernel/stackleak.c b/kernel/stackleak.c
> > index ddb5a7f48d69e..753eab797a04d 100644
> > --- a/kernel/stackleak.c
> > +++ b/kernel/stackleak.c
> > @@ -70,7 +70,7 @@ late_initcall(stackleak_sysctls_init);
> >   #define skip_erasing()	false
> >   #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
> > -asmlinkage void noinstr stackleak_erase(void)
> > +static __always_inline void __stackleak_erase(void)
> 
> Are you sure that __stackleak_erase() doesn't need asmlinkage and noinstr as well?

I am certain it needs neither.

It's static and never called from asm, so it doesn't need `asmlinkage`.

It's marked `__always_inline`, so it will always be inlined into its caller (or
if the compiler cannot inline it, will result in a compiler error).

That's important to get good codegen (especially with the on/off stack variants
later in the series), and when inlined into its caller the compiler will treat
it as part of its caller for code generation, so the caller's `noinstr` takes
effect.

Thanks,
Mark.

> 
> >   {
> >   	/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
> >   	unsigned long kstack_ptr = current->lowest_stack;
> > @@ -78,9 +78,6 @@ asmlinkage void noinstr stackleak_erase(void)
> >   	unsigned int poison_count = 0;
> >   	const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
> > -	if (skip_erasing())
> > -		return;
> > -
> >   	/* Check that 'lowest_stack' value is sane */
> >   	if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
> >   		kstack_ptr = boundary;
> > @@ -125,6 +122,14 @@ asmlinkage void noinstr stackleak_erase(void)
> >   	current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
> >   }
> > +asmlinkage void noinstr stackleak_erase(void)
> > +{
> > +	if (skip_erasing())
> > +		return;
> > +
> > +	__stackleak_erase();
> > +}
> > +
> >   void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
> >   {
> >   	unsigned long sp = current_stack_pointer;
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
diff mbox series

Patch

diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index ddb5a7f48d69e..753eab797a04d 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -70,7 +70,7 @@  late_initcall(stackleak_sysctls_init);
 #define skip_erasing()	false
 #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
 
-asmlinkage void noinstr stackleak_erase(void)
+static __always_inline void __stackleak_erase(void)
 {
 	/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
 	unsigned long kstack_ptr = current->lowest_stack;
@@ -78,9 +78,6 @@  asmlinkage void noinstr stackleak_erase(void)
 	unsigned int poison_count = 0;
 	const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
 
-	if (skip_erasing())
-		return;
-
 	/* Check that 'lowest_stack' value is sane */
 	if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
 		kstack_ptr = boundary;
@@ -125,6 +122,14 @@  asmlinkage void noinstr stackleak_erase(void)
 	current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
 }
 
+asmlinkage void noinstr stackleak_erase(void)
+{
+	if (skip_erasing())
+		return;
+
+	__stackleak_erase();
+}
+
 void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
 {
 	unsigned long sp = current_stack_pointer;