diff mbox

[v2] x86/suspend: fix false positive KASAN warning on suspend/resume

Message ID 20161201203154.mwt5x736g7z6jh3o@treble (mailing list archive)
State Superseded, archived
Delegated to: Rafael Wysocki
Headers show

Commit Message

Josh Poimboeuf Dec. 1, 2016, 8:31 p.m. UTC
Resuming from a suspend operation is showing a KASAN false positive
warning:

  BUG: KASAN: stack-out-of-bounds in unwind_get_return_address+0x11d/0x130 at addr ffff8803867d7878
  Read of size 8 by task pm-suspend/7774
  page:ffffea000e19f5c0 count:0 mapcount:0 mapping:          (null) index:0x0
  flags: 0x2ffff0000000000()
  page dumped because: kasan: bad access detected
  CPU: 0 PID: 7774 Comm: pm-suspend Tainted: G    B           4.9.0-rc7+ #8
  Hardware name: Gigabyte Technology Co., Ltd. Z170X-UD5/Z170X-UD5-CF, BIOS F5 03/07/2016
  Call Trace:
    dump_stack+0x63/0x82
    kasan_report_error+0x4b4/0x4e0
    ? acpi_hw_read_port+0xd0/0x1ea
    ? kfree_const+0x22/0x30
    ? acpi_hw_validate_io_request+0x1a6/0x1a6
    __asan_report_load8_noabort+0x61/0x70
    ? unwind_get_return_address+0x11d/0x130
    unwind_get_return_address+0x11d/0x130
    ? unwind_next_frame+0x97/0xf0
    __save_stack_trace+0x92/0x100
    save_stack_trace+0x1b/0x20
    save_stack+0x46/0xd0
    ? save_stack_trace+0x1b/0x20
    ? save_stack+0x46/0xd0
    ? kasan_kmalloc+0xad/0xe0
    ? kasan_slab_alloc+0x12/0x20
    ? acpi_hw_read+0x2b6/0x3aa
    ? acpi_hw_validate_register+0x20b/0x20b
    ? acpi_hw_write_port+0x72/0xc7
    ? acpi_hw_write+0x11f/0x15f
    ? acpi_hw_read_multiple+0x19f/0x19f
    ? memcpy+0x45/0x50
    ? acpi_hw_write_port+0x72/0xc7
    ? acpi_hw_write+0x11f/0x15f
    ? acpi_hw_read_multiple+0x19f/0x19f
    ? kasan_unpoison_shadow+0x36/0x50
    kasan_kmalloc+0xad/0xe0
    kasan_slab_alloc+0x12/0x20
    kmem_cache_alloc_trace+0xbc/0x1e0
    ? acpi_get_sleep_type_data+0x9a/0x578
    acpi_get_sleep_type_data+0x9a/0x578
    acpi_hw_legacy_wake_prep+0x88/0x22c
    ? acpi_hw_legacy_sleep+0x3c7/0x3c7
    ? acpi_write_bit_register+0x28d/0x2d3
    ? acpi_read_bit_register+0x19b/0x19b
    acpi_hw_sleep_dispatch+0xb5/0xba
    acpi_leave_sleep_state_prep+0x17/0x19
    acpi_suspend_enter+0x154/0x1e0
    ? trace_suspend_resume+0xe8/0xe8
    suspend_devices_and_enter+0xb09/0xdb0
    ? printk+0xa8/0xd8
    ? arch_suspend_enable_irqs+0x20/0x20
    ? try_to_freeze_tasks+0x295/0x600
    pm_suspend+0x6c9/0x780
    ? finish_wait+0x1f0/0x1f0
    ? suspend_devices_and_enter+0xdb0/0xdb0
    state_store+0xa2/0x120
    ? kobj_attr_show+0x60/0x60
    kobj_attr_store+0x36/0x70
    sysfs_kf_write+0x131/0x200
    kernfs_fop_write+0x295/0x3f0
    __vfs_write+0xef/0x760
    ? handle_mm_fault+0x1346/0x35e0
    ? do_iter_readv_writev+0x660/0x660
    ? __pmd_alloc+0x310/0x310
    ? do_lock_file_wait+0x1e0/0x1e0
    ? apparmor_file_permission+0x18/0x20
    ? security_file_permission+0x73/0x1c0
    ? rw_verify_area+0xbd/0x2b0
    vfs_write+0x149/0x4a0
    SyS_write+0xd9/0x1c0
    ? SyS_read+0x1c0/0x1c0
    entry_SYSCALL_64_fastpath+0x1e/0xad
  Memory state around the buggy address:
   ffff8803867d7700: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
   ffff8803867d7780: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  >ffff8803867d7800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f4
                                                                  ^
   ffff8803867d7880: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00
   ffff8803867d7900: 00 00 00 f1 f1 f1 f1 04 f4 f4 f4 f3 f3 f3 f3 00

KASAN instrumentation poisons the stack when entering a function and
unpoisons it when exiting the function.  However, in the suspend path,
some functions never return, so their stack never gets unpoisoned,
resulting in stale KASAN shadow data which can cause later false
positive warnings like the one above.

Reported-by: Scott Bauer <scott.bauer@intel.com>
Suggested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/kernel/acpi/wakeup_64.S | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

Comments

Dmitry Vyukov Dec. 2, 2016, 9:44 a.m. UTC | #1
On Thu, Dec 1, 2016 at 9:31 PM, Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> Resuming from a suspend operation is showing a KASAN false positive
> warning:
>
>   BUG: KASAN: stack-out-of-bounds in unwind_get_return_address+0x11d/0x130 at addr ffff8803867d7878
>   Read of size 8 by task pm-suspend/7774
>   page:ffffea000e19f5c0 count:0 mapcount:0 mapping:          (null) index:0x0
>   flags: 0x2ffff0000000000()
>   page dumped because: kasan: bad access detected
>   CPU: 0 PID: 7774 Comm: pm-suspend Tainted: G    B           4.9.0-rc7+ #8
>   Hardware name: Gigabyte Technology Co., Ltd. Z170X-UD5/Z170X-UD5-CF, BIOS F5 03/07/2016
>   Call Trace:
>     dump_stack+0x63/0x82
>     kasan_report_error+0x4b4/0x4e0
>     ? acpi_hw_read_port+0xd0/0x1ea
>     ? kfree_const+0x22/0x30
>     ? acpi_hw_validate_io_request+0x1a6/0x1a6
>     __asan_report_load8_noabort+0x61/0x70
>     ? unwind_get_return_address+0x11d/0x130
>     unwind_get_return_address+0x11d/0x130
>     ? unwind_next_frame+0x97/0xf0
>     __save_stack_trace+0x92/0x100
>     save_stack_trace+0x1b/0x20
>     save_stack+0x46/0xd0
>     ? save_stack_trace+0x1b/0x20
>     ? save_stack+0x46/0xd0
>     ? kasan_kmalloc+0xad/0xe0
>     ? kasan_slab_alloc+0x12/0x20
>     ? acpi_hw_read+0x2b6/0x3aa
>     ? acpi_hw_validate_register+0x20b/0x20b
>     ? acpi_hw_write_port+0x72/0xc7
>     ? acpi_hw_write+0x11f/0x15f
>     ? acpi_hw_read_multiple+0x19f/0x19f
>     ? memcpy+0x45/0x50
>     ? acpi_hw_write_port+0x72/0xc7
>     ? acpi_hw_write+0x11f/0x15f
>     ? acpi_hw_read_multiple+0x19f/0x19f
>     ? kasan_unpoison_shadow+0x36/0x50
>     kasan_kmalloc+0xad/0xe0
>     kasan_slab_alloc+0x12/0x20
>     kmem_cache_alloc_trace+0xbc/0x1e0
>     ? acpi_get_sleep_type_data+0x9a/0x578
>     acpi_get_sleep_type_data+0x9a/0x578
>     acpi_hw_legacy_wake_prep+0x88/0x22c
>     ? acpi_hw_legacy_sleep+0x3c7/0x3c7
>     ? acpi_write_bit_register+0x28d/0x2d3
>     ? acpi_read_bit_register+0x19b/0x19b
>     acpi_hw_sleep_dispatch+0xb5/0xba
>     acpi_leave_sleep_state_prep+0x17/0x19
>     acpi_suspend_enter+0x154/0x1e0
>     ? trace_suspend_resume+0xe8/0xe8
>     suspend_devices_and_enter+0xb09/0xdb0
>     ? printk+0xa8/0xd8
>     ? arch_suspend_enable_irqs+0x20/0x20
>     ? try_to_freeze_tasks+0x295/0x600
>     pm_suspend+0x6c9/0x780
>     ? finish_wait+0x1f0/0x1f0
>     ? suspend_devices_and_enter+0xdb0/0xdb0
>     state_store+0xa2/0x120
>     ? kobj_attr_show+0x60/0x60
>     kobj_attr_store+0x36/0x70
>     sysfs_kf_write+0x131/0x200
>     kernfs_fop_write+0x295/0x3f0
>     __vfs_write+0xef/0x760
>     ? handle_mm_fault+0x1346/0x35e0
>     ? do_iter_readv_writev+0x660/0x660
>     ? __pmd_alloc+0x310/0x310
>     ? do_lock_file_wait+0x1e0/0x1e0
>     ? apparmor_file_permission+0x18/0x20
>     ? security_file_permission+0x73/0x1c0
>     ? rw_verify_area+0xbd/0x2b0
>     vfs_write+0x149/0x4a0
>     SyS_write+0xd9/0x1c0
>     ? SyS_read+0x1c0/0x1c0
>     entry_SYSCALL_64_fastpath+0x1e/0xad
>   Memory state around the buggy address:
>    ffff8803867d7700: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>    ffff8803867d7780: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>   >ffff8803867d7800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f4
>                                                                   ^
>    ffff8803867d7880: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00
>    ffff8803867d7900: 00 00 00 f1 f1 f1 f1 04 f4 f4 f4 f3 f3 f3 f3 00
>
> KASAN instrumentation poisons the stack when entering a function and
> unpoisons it when exiting the function.  However, in the suspend path,
> some functions never return, so their stack never gets unpoisoned,
> resulting in stale KASAN shadow data which can cause later false
> positive warnings like the one above.
>
> Reported-by: Scott Bauer <scott.bauer@intel.com>
> Suggested-by: Dmitry Vyukov <dvyukov@google.com>
> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
> ---
>  arch/x86/kernel/acpi/wakeup_64.S | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
>
> diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
> index 169963f..1df9b75 100644
> --- a/arch/x86/kernel/acpi/wakeup_64.S
> +++ b/arch/x86/kernel/acpi/wakeup_64.S
> @@ -109,6 +109,22 @@ ENTRY(do_suspend_lowlevel)
>         movq    pt_regs_r14(%rax), %r14
>         movq    pt_regs_r15(%rax), %r15
>
> +#ifdef CONFIG_KASAN
> +       /*
> +        * The suspend path may have poisoned some areas deeper in the stack,
> +        * which we now need to unpoison.
> +        *
> +        * We can't call kasan_unpoison_task_stack_below() because it uses %gs
> +        * for 'current', which hasn't been set up yet.  Instead, calculate the
> +        * stack range manually and call kasan_unpoison_shadow().
> +        */
> +       movq    %rsp, %rdi
> +       andq    $CURRENT_MASK, %rdi
> +       movq    %rsp, %rsi
> +       xorq    %rdi, %rsi
> +       call    kasan_unpoison_shadow
> +#endif
> +
>         xorl    %eax, %eax
>         addq    $8, %rsp
>         FRAME_END


Reviewed-by: Dmitry Vyukov <dvyukov@google.com>

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pavel Machek Dec. 2, 2016, 12:54 p.m. UTC | #2
Hi!

> Resuming from a suspend operation is showing a KASAN false positive
> warning:

> KASAN instrumentation poisons the stack when entering a function and
> unpoisons it when exiting the function.  However, in the suspend path,
> some functions never return, so their stack never gets unpoisoned,
> resulting in stale KASAN shadow data which can cause later false
> positive warnings like the one above.
> 
> Reported-by: Scott Bauer <scott.bauer@intel.com>
> Suggested-by: Dmitry Vyukov <dvyukov@google.com>
> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>

Acked-by: Pavel Machek <pavel@ucw.cz>

> ---
>  arch/x86/kernel/acpi/wakeup_64.S | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
> index 169963f..1df9b75 100644
> --- a/arch/x86/kernel/acpi/wakeup_64.S
> +++ b/arch/x86/kernel/acpi/wakeup_64.S
> @@ -109,6 +109,22 @@ ENTRY(do_suspend_lowlevel)
>  	movq	pt_regs_r14(%rax), %r14
>  	movq	pt_regs_r15(%rax), %r15
>  
> +#ifdef CONFIG_KASAN
> +	/*
> +	 * The suspend path may have poisoned some areas deeper in the stack,
> +	 * which we now need to unpoison.
> +	 *
> +	 * We can't call kasan_unpoison_task_stack_below() because it uses %gs
> +	 * for 'current', which hasn't been set up yet.  Instead, calculate the
> +	 * stack range manually and call kasan_unpoison_shadow().
> +	 */
> +	movq	%rsp, %rdi
> +	andq	$CURRENT_MASK, %rdi
> +	movq	%rsp, %rsi
> +	xorq	%rdi, %rsi
> +	call	kasan_unpoison_shadow
> +#endif

Well... you may want to add note to kasan_unpoison_shadow()

/*
* This is called by early resume code, with cpu not yer properly
* resumed. In particular, %gs may not be set up, and thus current
* is not available.
*/

Thanks,
									Pavel
Josh Poimboeuf Dec. 2, 2016, 2:01 p.m. UTC | #3
On Fri, Dec 02, 2016 at 04:41:09PM +0300, Andrey Ryabinin wrote:
> 
> 
> On 12/01/2016 11:31 PM, Josh Poimboeuf wrote:
> 
> >  arch/x86/kernel/acpi/wakeup_64.S | 16 ++++++++++++++++
> >  1 file changed, 16 insertions(+)
> > 
> > diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
> > index 169963f..1df9b75 100644
> > --- a/arch/x86/kernel/acpi/wakeup_64.S
> > +++ b/arch/x86/kernel/acpi/wakeup_64.S
> > @@ -109,6 +109,22 @@ ENTRY(do_suspend_lowlevel)
> >  	movq	pt_regs_r14(%rax), %r14
> >  	movq	pt_regs_r15(%rax), %r15
> >  
> > +#ifdef CONFIG_KASAN
> > +	/*
> > +	 * The suspend path may have poisoned some areas deeper in the stack,
> > +	 * which we now need to unpoison.
> > +	 *
> > +	 * We can't call kasan_unpoison_task_stack_below() because it uses %gs
> > +	 * for 'current', which hasn't been set up yet.  Instead, calculate the
> > +	 * stack range manually and call kasan_unpoison_shadow().
> > +	 */
> > +	movq	%rsp, %rdi
> > +	andq	$CURRENT_MASK, %rdi
> > +	movq	%rsp, %rsi
> > +	xorq	%rdi, %rsi
> > +	call	kasan_unpoison_shadow
> > +#endif
> > +
> 
> Looks good, but in fact we can use kasan_unpoison_task_stack_below(). We just need to change it a little:
> 
> diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
> index 70c0097..e779236 100644
> --- a/mm/kasan/kasan.c
> +++ b/mm/kasan/kasan.c
> @@ -80,7 +80,9 @@ void kasan_unpoison_task_stack(struct task_struct *task)
>  /* Unpoison the stack for the current task beyond a watermark sp value. */
>  asmlinkage void kasan_unpoison_task_stack_below(const void *watermark)
>  {
> -       __kasan_unpoison_stack(current, watermark);
> +       void *base = (void *)((unsigned long)watermark & ~(THREAD_SIZE - 1));
> +
> +       kasan_unpoison_shadow(base, watermark - base);
>  }
> 
> 
> With this we don't have to calculate stack range in assembly.

That is better indeed, will do a v3.
Dmitry Vyukov Dec. 2, 2016, 2:02 p.m. UTC | #4
On Fri, Dec 2, 2016 at 3:01 PM, Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> On Fri, Dec 02, 2016 at 04:41:09PM +0300, Andrey Ryabinin wrote:
>>
>>
>> On 12/01/2016 11:31 PM, Josh Poimboeuf wrote:
>>
>> >  arch/x86/kernel/acpi/wakeup_64.S | 16 ++++++++++++++++
>> >  1 file changed, 16 insertions(+)
>> >
>> > diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
>> > index 169963f..1df9b75 100644
>> > --- a/arch/x86/kernel/acpi/wakeup_64.S
>> > +++ b/arch/x86/kernel/acpi/wakeup_64.S
>> > @@ -109,6 +109,22 @@ ENTRY(do_suspend_lowlevel)
>> >     movq    pt_regs_r14(%rax), %r14
>> >     movq    pt_regs_r15(%rax), %r15
>> >
>> > +#ifdef CONFIG_KASAN
>> > +   /*
>> > +    * The suspend path may have poisoned some areas deeper in the stack,
>> > +    * which we now need to unpoison.
>> > +    *
>> > +    * We can't call kasan_unpoison_task_stack_below() because it uses %gs
>> > +    * for 'current', which hasn't been set up yet.  Instead, calculate the
>> > +    * stack range manually and call kasan_unpoison_shadow().
>> > +    */
>> > +   movq    %rsp, %rdi
>> > +   andq    $CURRENT_MASK, %rdi
>> > +   movq    %rsp, %rsi
>> > +   xorq    %rdi, %rsi
>> > +   call    kasan_unpoison_shadow
>> > +#endif
>> > +
>>
>> Looks good, but in fact we can use kasan_unpoison_task_stack_below(). We just need to change it a little:
>>
>> diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
>> index 70c0097..e779236 100644
>> --- a/mm/kasan/kasan.c
>> +++ b/mm/kasan/kasan.c
>> @@ -80,7 +80,9 @@ void kasan_unpoison_task_stack(struct task_struct *task)
>>  /* Unpoison the stack for the current task beyond a watermark sp value. */
>>  asmlinkage void kasan_unpoison_task_stack_below(const void *watermark)
>>  {
>> -       __kasan_unpoison_stack(current, watermark);
>> +       void *base = (void *)((unsigned long)watermark & ~(THREAD_SIZE - 1));
>> +
>> +       kasan_unpoison_shadow(base, watermark - base);
>>  }
>>
>>
>> With this we don't have to calculate stack range in assembly.
>
> That is better indeed, will do a v3.

agree
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 169963f..1df9b75 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -109,6 +109,22 @@  ENTRY(do_suspend_lowlevel)
 	movq	pt_regs_r14(%rax), %r14
 	movq	pt_regs_r15(%rax), %r15
 
+#ifdef CONFIG_KASAN
+	/*
+	 * The suspend path may have poisoned some areas deeper in the stack,
+	 * which we now need to unpoison.
+	 *
+	 * We can't call kasan_unpoison_task_stack_below() because it uses %gs
+	 * for 'current', which hasn't been set up yet.  Instead, calculate the
+	 * stack range manually and call kasan_unpoison_shadow().
+	 */
+	movq	%rsp, %rdi
+	andq	$CURRENT_MASK, %rdi
+	movq	%rsp, %rsi
+	xorq	%rdi, %rsi
+	call	kasan_unpoison_shadow
+#endif
+
 	xorl	%eax, %eax
 	addq	$8, %rsp
 	FRAME_END