diff mbox series

[bpf-next] bpf: Fix deadlock between rcu_tasks_trace and event_mutex.

Message ID 20250224221637.4780-1-alexei.starovoitov@gmail.com (mailing list archive)
State New
Delegated to: BPF
Headers show
Series [bpf-next] bpf: Fix deadlock between rcu_tasks_trace and event_mutex. | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 13 maintainers not CCed: mhiramat@kernel.org haoluo@google.com song@kernel.org kpsingh@kernel.org john.fastabend@gmail.com jolsa@kernel.org mathieu.desnoyers@efficios.com yonghong.song@linux.dev rostedt@goodmis.org mattbobrowski@google.com sdf@fomichev.me martin.lau@linux.dev linux-trace-kernel@vger.kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 18 this patch: 18
netdev/checkpatch warning CHECK: Please use a blank line after function/struct/union/enum declarations
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-12 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-20 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-50 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-49 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-51 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-21 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-48 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc

Commit Message

Alexei Starovoitov Feb. 24, 2025, 10:16 p.m. UTC
From: Alexei Starovoitov <ast@kernel.org>

Fix the following deadlock:
CPU A
_free_event()
  perf_kprobe_destroy()
    mutex_lock(&event_mutex)
      perf_trace_event_unreg()
        synchronize_rcu_tasks_trace()

There are several paths where _free_event() grabs event_mutex
and calls sync_rcu_tasks_trace. Above is one such case.

CPU B
bpf_prog_test_run_syscall()
  rcu_read_lock_trace()
    bpf_prog_run_pin_on_cpu()
      bpf_prog_load()
        bpf_tracing_func_proto()
          trace_set_clr_event()
            mutex_lock(&event_mutex)

Delegate trace_set_clr_event() to workqueue to avoid
such lock dependency.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/trace/bpf_trace.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

Comments

Andrii Nakryiko Feb. 25, 2025, 1:06 a.m. UTC | #1
On Mon, Feb 24, 2025 at 2:16 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> From: Alexei Starovoitov <ast@kernel.org>
>
> Fix the following deadlock:
> CPU A
> _free_event()
>   perf_kprobe_destroy()
>     mutex_lock(&event_mutex)
>       perf_trace_event_unreg()
>         synchronize_rcu_tasks_trace()
>
> There are several paths where _free_event() grabs event_mutex
> and calls sync_rcu_tasks_trace. Above is one such case.
>
> CPU B
> bpf_prog_test_run_syscall()
>   rcu_read_lock_trace()
>     bpf_prog_run_pin_on_cpu()
>       bpf_prog_load()
>         bpf_tracing_func_proto()
>           trace_set_clr_event()
>             mutex_lock(&event_mutex)
>
> Delegate trace_set_clr_event() to workqueue to avoid
> such lock dependency.
>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>  kernel/trace/bpf_trace.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>

There is a tiny chance that bpf_printk() might not produce data (for a
little bit) if the time between program verification and its
triggering right after that is shorter than workqueue delay, right?
It's probably negligible in practice, so lgtm

Acked-by: Andrii Nakryiko <andrii@kernel.org>

> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index a612f6f182e5..13bef2462e94 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -392,7 +392,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
>         .arg2_type      = ARG_CONST_SIZE,
>  };
>
> -static void __set_printk_clr_event(void)
> +static void __set_printk_clr_event(struct work_struct *work)
>  {
>         /*
>          * This program might be calling bpf_trace_printk,
> @@ -405,10 +405,11 @@ static void __set_printk_clr_event(void)
>         if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
>                 pr_warn_ratelimited("could not enable bpf_trace_printk events");
>  }
> +static DECLARE_WORK(set_printk_work, __set_printk_clr_event);
>
>  const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
>  {
> -       __set_printk_clr_event();
> +       schedule_work(&set_printk_work);
>         return &bpf_trace_printk_proto;
>  }
>
> @@ -451,7 +452,7 @@ static const struct bpf_func_proto bpf_trace_vprintk_proto = {
>
>  const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
>  {
> -       __set_printk_clr_event();
> +       schedule_work(&set_printk_work);
>         return &bpf_trace_vprintk_proto;
>  }
>
> --
> 2.43.5
>
Alexei Starovoitov Feb. 25, 2025, 1:56 a.m. UTC | #2
On Mon, Feb 24, 2025 at 5:06 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Feb 24, 2025 at 2:16 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > From: Alexei Starovoitov <ast@kernel.org>
> >
> > Fix the following deadlock:
> > CPU A
> > _free_event()
> >   perf_kprobe_destroy()
> >     mutex_lock(&event_mutex)
> >       perf_trace_event_unreg()
> >         synchronize_rcu_tasks_trace()
> >
> > There are several paths where _free_event() grabs event_mutex
> > and calls sync_rcu_tasks_trace. Above is one such case.
> >
> > CPU B
> > bpf_prog_test_run_syscall()
> >   rcu_read_lock_trace()
> >     bpf_prog_run_pin_on_cpu()
> >       bpf_prog_load()
> >         bpf_tracing_func_proto()
> >           trace_set_clr_event()
> >             mutex_lock(&event_mutex)
> >
> > Delegate trace_set_clr_event() to workqueue to avoid
> > such lock dependency.
> >
> > Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> > ---
> >  kernel/trace/bpf_trace.c | 7 ++++---
> >  1 file changed, 4 insertions(+), 3 deletions(-)
> >
>
> There is a tiny chance that bpf_printk() might not produce data (for a
> little bit) if the time between program verification and its
> triggering right after that is shorter than workqueue delay, right?

yeah, but also see the comment in __set_printk_clr_event().
Unfortunately users can enable/disable this event at any time
just like other ftrace events.
The trace_bpf_trace_printk is fragile and racy.
In addition, trace_pipe can be configured in weird ways.
cat /sys/kernel/tracing/trace_pipe
will look nothing like normal.
All existing footgun warnings apply.

With Kumar we started discussing a new debug/printk mechanism.
So that arena faults, res_spin_lock timeous can be printed there
and consumed per program instead of global trace_pipe.

> It's probably negligible in practice, so lgtm
>
> Acked-by: Andrii Nakryiko <andrii@kernel.org>
>
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index a612f6f182e5..13bef2462e94 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -392,7 +392,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
> >         .arg2_type      = ARG_CONST_SIZE,
> >  };
> >
> > -static void __set_printk_clr_event(void)
> > +static void __set_printk_clr_event(struct work_struct *work)
> >  {
> >         /*
> >          * This program might be calling bpf_trace_printk,
> > @@ -405,10 +405,11 @@ static void __set_printk_clr_event(void)
> >         if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
> >                 pr_warn_ratelimited("could not enable bpf_trace_printk events");
> >  }
> > +static DECLARE_WORK(set_printk_work, __set_printk_clr_event);
> >
> >  const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
> >  {
> > -       __set_printk_clr_event();
> > +       schedule_work(&set_printk_work);
> >         return &bpf_trace_printk_proto;
> >  }
> >
> > @@ -451,7 +452,7 @@ static const struct bpf_func_proto bpf_trace_vprintk_proto = {
> >
> >  const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
> >  {
> > -       __set_printk_clr_event();
> > +       schedule_work(&set_printk_work);
> >         return &bpf_trace_vprintk_proto;
> >  }
> >
> > --
> > 2.43.5
> >
diff mbox series

Patch

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a612f6f182e5..13bef2462e94 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -392,7 +392,7 @@  static const struct bpf_func_proto bpf_trace_printk_proto = {
 	.arg2_type	= ARG_CONST_SIZE,
 };
 
-static void __set_printk_clr_event(void)
+static void __set_printk_clr_event(struct work_struct *work)
 {
 	/*
 	 * This program might be calling bpf_trace_printk,
@@ -405,10 +405,11 @@  static void __set_printk_clr_event(void)
 	if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
 		pr_warn_ratelimited("could not enable bpf_trace_printk events");
 }
+static DECLARE_WORK(set_printk_work, __set_printk_clr_event);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 {
-	__set_printk_clr_event();
+	schedule_work(&set_printk_work);
 	return &bpf_trace_printk_proto;
 }
 
@@ -451,7 +452,7 @@  static const struct bpf_func_proto bpf_trace_vprintk_proto = {
 
 const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
 {
-	__set_printk_clr_event();
+	schedule_work(&set_printk_work);
 	return &bpf_trace_vprintk_proto;
 }