diff mbox series

[bpf-next,v2,08/14] bpf: Prevent KASAN false positive with bpf_throw

Message ID 20230809114116.3216687-9-memxor@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Exceptions - 1/2 | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 15391 this patch: 15392
netdev/cc_maintainers warning 7 maintainers not CCed: kasan-dev@googlegroups.com kpsingh@kernel.org john.fastabend@gmail.com sdf@google.com song@kernel.org jolsa@kernel.org haoluo@google.com
netdev/build_clang success Errors and warnings before: 4719 this patch: 4719
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 16528 this patch: 16528
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 28 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-29 success Logs for veristat
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-1 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on s390x with gcc

Commit Message

Kumar Kartikeya Dwivedi Aug. 9, 2023, 11:41 a.m. UTC
The KASAN stack instrumentation when CONFIG_KASAN_STACK is true poisons
the stack of a function when it is entered and unpoisons it when
leaving. However, in the case of bpf_throw, we will never return as we
switch our stack frame to the BPF exception callback. Later, this
discrepancy will lead to confusing KASAN splats when kernel resumes
execution on return from the BPF program.

Fix this by unpoisoning everything below the stack pointer of the BPF
program, which should cover the range that would not be unpoisoned. An
example splat is below:

BUG: KASAN: stack-out-of-bounds in stack_trace_consume_entry+0x14e/0x170
Write of size 8 at addr ffffc900013af958 by task test_progs/227

CPU: 0 PID: 227 Comm: test_progs Not tainted 6.5.0-rc2-g43f1c6c9052a-dirty #26
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-2.fc39 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x4a/0x80
 print_report+0xcf/0x670
 ? arch_stack_walk+0x79/0x100
 kasan_report+0xda/0x110
 ? stack_trace_consume_entry+0x14e/0x170
 ? stack_trace_consume_entry+0x14e/0x170
 ? __pfx_stack_trace_consume_entry+0x10/0x10
 stack_trace_consume_entry+0x14e/0x170
 ? __sys_bpf+0xf2e/0x41b0
 arch_stack_walk+0x8b/0x100
 ? __sys_bpf+0xf2e/0x41b0
 ? bpf_prog_test_run_skb+0x341/0x1c70
 ? bpf_prog_test_run_skb+0x341/0x1c70
 stack_trace_save+0x9b/0xd0
 ? __pfx_stack_trace_save+0x10/0x10
 ? __kasan_slab_free+0x109/0x180
 ? bpf_prog_test_run_skb+0x341/0x1c70
 ? __sys_bpf+0xf2e/0x41b0
 ? __x64_sys_bpf+0x78/0xc0
 ? do_syscall_64+0x3c/0x90
 ? entry_SYSCALL_64_after_hwframe+0x6e/0xd8
 kasan_save_stack+0x33/0x60
 ? kasan_save_stack+0x33/0x60
 ? kasan_set_track+0x25/0x30
 ? kasan_save_free_info+0x2b/0x50
 ? __kasan_slab_free+0x109/0x180
 ? kmem_cache_free+0x191/0x460
 ? bpf_prog_test_run_skb+0x341/0x1c70
 kasan_set_track+0x25/0x30
 kasan_save_free_info+0x2b/0x50
 __kasan_slab_free+0x109/0x180
 kmem_cache_free+0x191/0x460
 bpf_prog_test_run_skb+0x341/0x1c70
 ? __pfx_bpf_prog_test_run_skb+0x10/0x10
 ? __fget_light+0x51/0x220
 __sys_bpf+0xf2e/0x41b0
 ? __might_fault+0xa2/0x170
 ? __pfx___sys_bpf+0x10/0x10
 ? lock_release+0x1de/0x620
 ? __might_fault+0xcd/0x170
 ? __pfx_lock_release+0x10/0x10
 ? __pfx_blkcg_maybe_throttle_current+0x10/0x10
 __x64_sys_bpf+0x78/0xc0
 ? syscall_enter_from_user_mode+0x20/0x50
 do_syscall_64+0x3c/0x90
 entry_SYSCALL_64_after_hwframe+0x6e/0xd8
RIP: 0033:0x7f0fbb38880d
Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d
89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d f3 45 12 00 f7 d8 64
89 01 48
RSP: 002b:00007ffe13907de8 EFLAGS: 00000206 ORIG_RAX: 0000000000000141
RAX: ffffffffffffffda RBX: 00007ffe13908708 RCX: 00007f0fbb38880d
RDX: 0000000000000050 RSI: 00007ffe13907e20 RDI: 000000000000000a
RBP: 00007ffe13907e00 R08: 0000000000000000 R09: 00007ffe13907e20
R10: 0000000000000064 R11: 0000000000000206 R12: 0000000000000003
R13: 0000000000000000 R14: 00007f0fbb532000 R15: 0000000000cfbd90
 </TASK>

The buggy address belongs to stack of task test_progs/227
KASAN internal error: frame info validation failed; invalid marker: 0

The buggy address belongs to the virtual mapping at
 [ffffc900013a8000, ffffc900013b1000) created by:
 kernel_clone+0xcd/0x600

The buggy address belongs to the physical page:
page:00000000b70f4332 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11418f
flags: 0x2fffe0000000000(node=0|zone=2|lastcpupid=0x7fff)
page_type: 0xffffffff()
raw: 02fffe0000000000 0000000000000000 dead000000000122 0000000000000000
raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffffc900013af800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffffc900013af880: 00 00 00 f1 f1 f1 f1 00 00 00 f3 f3 f3 f3 f3 00
>ffffc900013af900: 00 00 00 00 00 00 00 00 00 00 00 f1 00 00 00 00
                                                    ^
 ffffc900013af980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffffc900013afa00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
==================================================================
Disabling lock debugging due to kernel taint

Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 include/linux/kasan.h | 2 ++
 kernel/bpf/helpers.c  | 6 ++++++
 2 files changed, 8 insertions(+)

Comments

Alexei Starovoitov Aug. 22, 2023, 4:23 p.m. UTC | #1
Andrey, Dmitry,

Please help review this patch.


On Wed, Aug 9, 2023 at 4:43 AM Kumar Kartikeya Dwivedi <memxor@gmail.com> wrote:
>
> The KASAN stack instrumentation when CONFIG_KASAN_STACK is true poisons
> the stack of a function when it is entered and unpoisons it when
> leaving. However, in the case of bpf_throw, we will never return as we
> switch our stack frame to the BPF exception callback. Later, this
> discrepancy will lead to confusing KASAN splats when kernel resumes
> execution on return from the BPF program.
>
> Fix this by unpoisoning everything below the stack pointer of the BPF
> program, which should cover the range that would not be unpoisoned. An
> example splat is below:
>
> BUG: KASAN: stack-out-of-bounds in stack_trace_consume_entry+0x14e/0x170
> Write of size 8 at addr ffffc900013af958 by task test_progs/227
>
> CPU: 0 PID: 227 Comm: test_progs Not tainted 6.5.0-rc2-g43f1c6c9052a-dirty #26
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-2.fc39 04/01/2014
> Call Trace:
>  <TASK>
>  dump_stack_lvl+0x4a/0x80
>  print_report+0xcf/0x670
>  ? arch_stack_walk+0x79/0x100
>  kasan_report+0xda/0x110
>  ? stack_trace_consume_entry+0x14e/0x170
>  ? stack_trace_consume_entry+0x14e/0x170
>  ? __pfx_stack_trace_consume_entry+0x10/0x10
>  stack_trace_consume_entry+0x14e/0x170
>  ? __sys_bpf+0xf2e/0x41b0
>  arch_stack_walk+0x8b/0x100
>  ? __sys_bpf+0xf2e/0x41b0
>  ? bpf_prog_test_run_skb+0x341/0x1c70
>  ? bpf_prog_test_run_skb+0x341/0x1c70
>  stack_trace_save+0x9b/0xd0
>  ? __pfx_stack_trace_save+0x10/0x10
>  ? __kasan_slab_free+0x109/0x180
>  ? bpf_prog_test_run_skb+0x341/0x1c70
>  ? __sys_bpf+0xf2e/0x41b0
>  ? __x64_sys_bpf+0x78/0xc0
>  ? do_syscall_64+0x3c/0x90
>  ? entry_SYSCALL_64_after_hwframe+0x6e/0xd8
>  kasan_save_stack+0x33/0x60
>  ? kasan_save_stack+0x33/0x60
>  ? kasan_set_track+0x25/0x30
>  ? kasan_save_free_info+0x2b/0x50
>  ? __kasan_slab_free+0x109/0x180
>  ? kmem_cache_free+0x191/0x460
>  ? bpf_prog_test_run_skb+0x341/0x1c70
>  kasan_set_track+0x25/0x30
>  kasan_save_free_info+0x2b/0x50
>  __kasan_slab_free+0x109/0x180
>  kmem_cache_free+0x191/0x460
>  bpf_prog_test_run_skb+0x341/0x1c70
>  ? __pfx_bpf_prog_test_run_skb+0x10/0x10
>  ? __fget_light+0x51/0x220
>  __sys_bpf+0xf2e/0x41b0
>  ? __might_fault+0xa2/0x170
>  ? __pfx___sys_bpf+0x10/0x10
>  ? lock_release+0x1de/0x620
>  ? __might_fault+0xcd/0x170
>  ? __pfx_lock_release+0x10/0x10
>  ? __pfx_blkcg_maybe_throttle_current+0x10/0x10
>  __x64_sys_bpf+0x78/0xc0
>  ? syscall_enter_from_user_mode+0x20/0x50
>  do_syscall_64+0x3c/0x90
>  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
> RIP: 0033:0x7f0fbb38880d
> Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d
> 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d f3 45 12 00 f7 d8 64
> 89 01 48
> RSP: 002b:00007ffe13907de8 EFLAGS: 00000206 ORIG_RAX: 0000000000000141
> RAX: ffffffffffffffda RBX: 00007ffe13908708 RCX: 00007f0fbb38880d
> RDX: 0000000000000050 RSI: 00007ffe13907e20 RDI: 000000000000000a
> RBP: 00007ffe13907e00 R08: 0000000000000000 R09: 00007ffe13907e20
> R10: 0000000000000064 R11: 0000000000000206 R12: 0000000000000003
> R13: 0000000000000000 R14: 00007f0fbb532000 R15: 0000000000cfbd90
>  </TASK>
>
> The buggy address belongs to stack of task test_progs/227
> KASAN internal error: frame info validation failed; invalid marker: 0
>
> The buggy address belongs to the virtual mapping at
>  [ffffc900013a8000, ffffc900013b1000) created by:
>  kernel_clone+0xcd/0x600
>
> The buggy address belongs to the physical page:
> page:00000000b70f4332 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11418f
> flags: 0x2fffe0000000000(node=0|zone=2|lastcpupid=0x7fff)
> page_type: 0xffffffff()
> raw: 02fffe0000000000 0000000000000000 dead000000000122 0000000000000000
> raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
> page dumped because: kasan: bad access detected
>
> Memory state around the buggy address:
>  ffffc900013af800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>  ffffc900013af880: 00 00 00 f1 f1 f1 f1 00 00 00 f3 f3 f3 f3 f3 00
> >ffffc900013af900: 00 00 00 00 00 00 00 00 00 00 00 f1 00 00 00 00
>                                                     ^
>  ffffc900013af980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>  ffffc900013afa00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> ==================================================================
> Disabling lock debugging due to kernel taint
>
> Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
> Cc: Alexander Potapenko <glider@google.com>
> Cc: Andrey Konovalov <andreyknvl@gmail.com>
> Cc: Dmitry Vyukov <dvyukov@google.com>
> Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
> ---
>  include/linux/kasan.h | 2 ++
>  kernel/bpf/helpers.c  | 6 ++++++
>  2 files changed, 8 insertions(+)
>
> diff --git a/include/linux/kasan.h b/include/linux/kasan.h
> index 819b6bc8ac08..7a463f814db2 100644
> --- a/include/linux/kasan.h
> +++ b/include/linux/kasan.h
> @@ -283,8 +283,10 @@ static inline bool kasan_check_byte(const void *address)
>
>  #if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK)
>  void kasan_unpoison_task_stack(struct task_struct *task);
> +asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
>  #else
>  static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
> +static inline void kasan_unpoison_task_stack_below(const void *watermark) {}
>  #endif
>
>  #ifdef CONFIG_KASAN_GENERIC
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index af4add1e3a31..64a07232c58f 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -22,6 +22,7 @@
>  #include <linux/security.h>
>  #include <linux/btf_ids.h>
>  #include <linux/bpf_mem_alloc.h>
> +#include <linux/kasan.h>
>
>  #include "../../lib/kstrtox.h"
>
> @@ -2463,6 +2464,11 @@ __bpf_kfunc void bpf_throw(u64 cookie)
>                 WARN_ON_ONCE(!ctx.aux->exception_boundary);
>         WARN_ON_ONCE(!ctx.bp);
>         WARN_ON_ONCE(!ctx.cnt);
> +       /* Prevent KASAN false positives for CONFIG_KASAN_STACK by unpoisoning
> +        * deeper stack depths than ctx.sp as we do not return from bpf_throw,
> +        * which skips compiler generated instrumentation to do the same.
> +        */
> +       kasan_unpoison_task_stack_below((void *)ctx.sp);
>         ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp);
>  }
>
> --
> 2.41.0
>
Andrey Konovalov Aug. 30, 2023, 4:53 p.m. UTC | #2
On Wed, Aug 9, 2023 at 1:43 PM Kumar Kartikeya Dwivedi <memxor@gmail.com> wrote:
>

Hi Kumar,

> @@ -283,8 +283,10 @@ static inline bool kasan_check_byte(const void *address)
>
>  #if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK)
>  void kasan_unpoison_task_stack(struct task_struct *task);
> +asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
>  #else
>  static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
> +static inline void kasan_unpoison_task_stack_below(const void *watermark) {}
>  #endif

Please also drop the kasan_unpoison_task_stack_below declaration from
mm/kasan/kasan.h.

Also, could you please split this change into 2: one that exposes
kasan_unpoison_task_stack_below and another that changes the bpf code.
This will greatly simplify backporting KASAN changes for older
kernels.

Thank you!
diff mbox series

Patch

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 819b6bc8ac08..7a463f814db2 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -283,8 +283,10 @@  static inline bool kasan_check_byte(const void *address)
 
 #if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK)
 void kasan_unpoison_task_stack(struct task_struct *task);
+asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
 #else
 static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+static inline void kasan_unpoison_task_stack_below(const void *watermark) {}
 #endif
 
 #ifdef CONFIG_KASAN_GENERIC
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index af4add1e3a31..64a07232c58f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -22,6 +22,7 @@ 
 #include <linux/security.h>
 #include <linux/btf_ids.h>
 #include <linux/bpf_mem_alloc.h>
+#include <linux/kasan.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -2463,6 +2464,11 @@  __bpf_kfunc void bpf_throw(u64 cookie)
 		WARN_ON_ONCE(!ctx.aux->exception_boundary);
 	WARN_ON_ONCE(!ctx.bp);
 	WARN_ON_ONCE(!ctx.cnt);
+	/* Prevent KASAN false positives for CONFIG_KASAN_STACK by unpoisoning
+	 * deeper stack depths than ctx.sp as we do not return from bpf_throw,
+	 * which skips compiler generated instrumentation to do the same.
+	 */
+	kasan_unpoison_task_stack_below((void *)ctx.sp);
 	ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp);
 }