diff mbox series

perf callchain: Fix suspicious RCU usage in get_callchain_entry()

Message ID 20240715102326.1910790-2-radoslaw.zielonek@gmail.com (mailing list archive)
State Not Applicable
Headers show
Series perf callchain: Fix suspicious RCU usage in get_callchain_entry() | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch, async

Commit Message

Radoslaw Zielonek July 15, 2024, 10:23 a.m. UTC
The rcu_dereference() is using rcu_read_lock_held() as a checker, but
BPF in bpf_prog_test_run_syscall() is using rcu_read_lock_trace() locker.
To fix this issue the proper checker has been used
(rcu_read_lock_trace_held() || rcu_read_lock_held())

syzbot reported:

=============================
WARNING: suspicious RCU usage
6.9.0-rc5-syzkaller-00159-gc942a0cd3603 #0 Not tainted
-----------------------------
kernel/events/callchain.c:161 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
1 lock held by syz-executor305/5180:

stack backtrace:
CPU: 3 PID: 5180 Comm: syz-executor305 Not tainted 6.9.0-rc5-syzkaller-00159-gc942a0cd3603 #0
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x16c/0x1f0 lib/dump_stack.c:114
lockdep_rcu_suspicious+0x20b/0x3b0 kernel/locking/lockdep.c:6712
get_callchain_entry+0x274/0x3f0 kernel/events/callchain.c:161
get_perf_callchain+0xdc/0x5a0 kernel/events/callchain.c:187
__bpf_get_stack+0x4d9/0x700 kernel/bpf/stackmap.c:435
____bpf_get_stack_raw_tp kernel/trace/bpf_trace.c:1985 [inline]
bpf_get_stack_raw_tp+0x124/0x160 kernel/trace/bpf_trace.c:1975
___bpf_prog_run+0x3e51/0xabd0 kernel/bpf/core.c:1997
__bpf_prog_run32+0xc1/0x100 kernel/bpf/core.c:2236
bpf_dispatcher_nop_func include/linux/bpf.h:1234 [inline]
__bpf_prog_run include/linux/filter.h:657 [inline]
bpf_prog_run include/linux/filter.h:664 [inline]
bpf_prog_run_pin_on_cpu include/linux/filter.h:681 [inline]
bpf_prog_test_run_syscall+0x3ae/0x770 net/bpf/test_run.c:1509
bpf_prog_test_run kernel/bpf/syscall.c:4269 [inline]
__sys_bpf+0xd56/0x4b40 kernel/bpf/syscall.c:5678
__do_sys_bpf kernel/bpf/syscall.c:5767 [inline]
__se_sys_bpf kernel/bpf/syscall.c:5765 [inline]
__x64_sys_bpf+0x78/0xc0 kernel/bpf/syscall.c:5765
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0xcf/0x260 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f54610dc669
</TASK>

Reported-by: syzbot+72a43cdb78469f7fbad1@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=72a43cdb78469f7fbad1

Signed-off-by: Radoslaw Zielonek <radoslaw.zielonek@gmail.com>
---
 kernel/events/callchain.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

Comments

Peter Zijlstra July 15, 2024, 10:47 a.m. UTC | #1
On Mon, Jul 15, 2024 at 12:23:27PM +0200, Radoslaw Zielonek wrote:
> The rcu_dereference() is using rcu_read_lock_held() as a checker, but
> BPF in bpf_prog_test_run_syscall() is using rcu_read_lock_trace() locker.
> To fix this issue the proper checker has been used
> (rcu_read_lock_trace_held() || rcu_read_lock_held())

How does that fix it? release_callchain_buffers() does call_rcu(), not
call_rcu_tracing().

Does a normal RCU grace period fully imply an RCU-tracing grace period?

> ---
>  kernel/events/callchain.c | 11 +++++++++--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
> index 1273be84392c..a8af7cd50626 100644
> --- a/kernel/events/callchain.c
> +++ b/kernel/events/callchain.c
> @@ -11,6 +11,7 @@
>  #include <linux/perf_event.h>
>  #include <linux/slab.h>
>  #include <linux/sched/task_stack.h>
> +#include <linux/rcupdate_trace.h>
>  
>  #include "internal.h"
>  
> @@ -32,7 +33,7 @@ static inline size_t perf_callchain_entry__sizeof(void)
>  static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
>  static atomic_t nr_callchain_events;
>  static DEFINE_MUTEX(callchain_mutex);
> -static struct callchain_cpus_entries *callchain_cpus_entries;
> +static struct callchain_cpus_entries __rcu *callchain_cpus_entries;
>  
>  
>  __weak void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
> @@ -158,7 +159,13 @@ struct perf_callchain_entry *get_callchain_entry(int *rctx)
>  	if (*rctx == -1)
>  		return NULL;
>  
> -	entries = rcu_dereference(callchain_cpus_entries);
> +	/*
> +	 * BPF locked rcu using rcu_read_lock_trace() in
> +	 * bpf_prog_test_run_syscall()
> +	 */
> +	entries = rcu_dereference_check(callchain_cpus_entries,
> +					rcu_read_lock_trace_held() ||
> +					rcu_read_lock_held());
>  	if (!entries) {
>  		put_recursion_context(this_cpu_ptr(callchain_recursion), *rctx);
>  		return NULL;
> -- 
> 2.43.0
>
Andrii Nakryiko July 18, 2024, 5:41 p.m. UTC | #2
On Mon, Jul 15, 2024 at 3:47 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Jul 15, 2024 at 12:23:27PM +0200, Radoslaw Zielonek wrote:
> > The rcu_dereference() is using rcu_read_lock_held() as a checker, but
> > BPF in bpf_prog_test_run_syscall() is using rcu_read_lock_trace() locker.
> > To fix this issue the proper checker has been used
> > (rcu_read_lock_trace_held() || rcu_read_lock_held())
>
> How does that fix it? release_callchain_buffers() does call_rcu(), not
> call_rcu_tracing().
>
> Does a normal RCU grace period fully imply an RCU-tracing grace period?

I don't think so, they are completely independent. So this change
doesn't seem correct. I think we should just ensure
rcu_read_lock()/rcu_read_unlock() before calling into perf_callchain
functionality.

Which is what I'm doing in [0]. Radoslaw, can you please help
validating if those changes are enough to fix this issue or we need to
do some more?

  [0] https://patchwork.kernel.org/project/netdevbpf/patch/20240709204245.3847811-10-andrii@kernel.org/

>
> > ---
> >  kernel/events/callchain.c | 11 +++++++++--
> >  1 file changed, 9 insertions(+), 2 deletions(-)
> >

[...]
diff mbox series

Patch

diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 1273be84392c..a8af7cd50626 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -11,6 +11,7 @@ 
 #include <linux/perf_event.h>
 #include <linux/slab.h>
 #include <linux/sched/task_stack.h>
+#include <linux/rcupdate_trace.h>
 
 #include "internal.h"
 
@@ -32,7 +33,7 @@  static inline size_t perf_callchain_entry__sizeof(void)
 static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
 static atomic_t nr_callchain_events;
 static DEFINE_MUTEX(callchain_mutex);
-static struct callchain_cpus_entries *callchain_cpus_entries;
+static struct callchain_cpus_entries __rcu *callchain_cpus_entries;
 
 
 __weak void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
@@ -158,7 +159,13 @@  struct perf_callchain_entry *get_callchain_entry(int *rctx)
 	if (*rctx == -1)
 		return NULL;
 
-	entries = rcu_dereference(callchain_cpus_entries);
+	/*
+	 * BPF locked rcu using rcu_read_lock_trace() in
+	 * bpf_prog_test_run_syscall()
+	 */
+	entries = rcu_dereference_check(callchain_cpus_entries,
+					rcu_read_lock_trace_held() ||
+					rcu_read_lock_held());
 	if (!entries) {
 		put_recursion_context(this_cpu_ptr(callchain_recursion), *rctx);
 		return NULL;