diff mbox series

[RFC] bpf: Prevent recursive deadlocks in BPF programs attached to spin lock helpers using fentry/ fexit

Message ID CAE5sdEigPnoGrzN8WU7Tx-h-iFuMZgW06qp0KHWtpvoXxf1OAQ@mail.gmail.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series [RFC] bpf: Prevent recursive deadlocks in BPF programs attached to spin lock helpers using fentry/ fexit | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17

Commit Message

Siddharth Chintamaneni Jan. 24, 2024, 3:43 p.m. UTC
While we were working on some experiments with BPF trampoline, we came
across a deadlock scenario that could happen.

A deadlock happens when two nested BPF programs tries to acquire the
same lock i.e, If a BPF program is attached using fexit to
bpf_spin_lock or using a fentry to bpf_spin_unlock, and it then
attempts to acquire the same lock as the previous BPF program, a
deadlock situation arises.

Here is an example:

SEC(fentry/bpf_spin_unlock)
int fentry_2{
  bpf_spin_lock(&x->lock);
  bpf_spin_unlock(&x->lock);
}

SEC(fentry/xxx)
int fentry_1{
  bpf_spin_lock(&x->lock);
  bpf_spin_unlock(&x->lock);
}

To prevent these cases, a simple fix could be adding these helpers to
denylist in the verifier. This fix will prevent the BPF programs from
being loaded by the verifier.

previously, a similar solution was proposed to prevent recursion.
https://lore.kernel.org/lkml/20230417154737.12740-2-laoar.shao@gmail.com/

Signed-off-by: Siddharth Chintamaneni <sidchintamaneni@vt.edu>
---

Comments

Jiri Olsa Jan. 30, 2024, 9:25 a.m. UTC | #1
On Wed, Jan 24, 2024 at 10:43:32AM -0500, Siddharth Chintamaneni wrote:
> While we were working on some experiments with BPF trampoline, we came
> across a deadlock scenario that could happen.
> 
> A deadlock happens when two nested BPF programs tries to acquire the
> same lock i.e, If a BPF program is attached using fexit to
> bpf_spin_lock or using a fentry to bpf_spin_unlock, and it then
> attempts to acquire the same lock as the previous BPF program, a
> deadlock situation arises.
> 
> Here is an example:
> 
> SEC(fentry/bpf_spin_unlock)
> int fentry_2{
>   bpf_spin_lock(&x->lock);
>   bpf_spin_unlock(&x->lock);
> }
> 
> SEC(fentry/xxx)
> int fentry_1{
>   bpf_spin_lock(&x->lock);
>   bpf_spin_unlock(&x->lock);
> }

hi,
looks like valid issue, could you add selftest for that?

I wonder we could restrict just programs that use bpf_spin_lock/bpf_spin_unlock
helpers? I'm not sure there's any useful use case for tracing spin lock helpers,
but I think we should at least try this before we deny it completely

> 
> To prevent these cases, a simple fix could be adding these helpers to
> denylist in the verifier. This fix will prevent the BPF programs from
> being loaded by the verifier.
> 
> previously, a similar solution was proposed to prevent recursion.
> https://lore.kernel.org/lkml/20230417154737.12740-2-laoar.shao@gmail.com/

the difference is that __rcu_read_lock/__rcu_read_unlock are called unconditionally
(always) when executing bpf tracing probe, the problem you described above is only
for programs calling spin lock helpers (on same spin lock)

> 
> Signed-off-by: Siddharth Chintamaneni <sidchintamaneni@vt.edu>
> ---
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 65f598694d55..8f1834f27f81 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -20617,6 +20617,10 @@ BTF_ID(func, preempt_count_sub)
>  BTF_ID(func, __rcu_read_lock)
>  BTF_ID(func, __rcu_read_unlock)
>  #endif
> +#if defined(CONFIG_DYNAMIC_FTRACE)

why the CONFIG_DYNAMIC_FTRACE dependency?

jirka

> +BTF_ID(func, bpf_spin_lock)
> +BTF_ID(func, bpf_spin_unlock)
> +#endif
>  BTF_SET_END(btf_id_deny)
Siddharth Chintamaneni Feb. 3, 2024, 12:21 a.m. UTC | #2
On Tue, 30 Jan 2024 at 04:25, Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Wed, Jan 24, 2024 at 10:43:32AM -0500, Siddharth Chintamaneni wrote:
> > While we were working on some experiments with BPF trampoline, we came
> > across a deadlock scenario that could happen.
> >
> > A deadlock happens when two nested BPF programs tries to acquire the
> > same lock i.e, If a BPF program is attached using fexit to
> > bpf_spin_lock or using a fentry to bpf_spin_unlock, and it then
> > attempts to acquire the same lock as the previous BPF program, a
> > deadlock situation arises.
> >
> > Here is an example:
> >
> > SEC(fentry/bpf_spin_unlock)
> > int fentry_2{
> >   bpf_spin_lock(&x->lock);
> >   bpf_spin_unlock(&x->lock);
> > }
> >
> > SEC(fentry/xxx)
> > int fentry_1{
> >   bpf_spin_lock(&x->lock);
> >   bpf_spin_unlock(&x->lock);
> > }
>
> hi,
> looks like valid issue, could you add selftest for that?

Hello,
I have added selftest for the deadlock scenario.

>
> I wonder we could restrict just programs that use bpf_spin_lock/bpf_spin_unlock
> helpers? I'm not sure there's any useful use case for tracing spin lock helpers,
> but I think we should at least try this before we deny it completely
>

If we restrict programs (attached to spinlock helpers) that use
bpf_spin_lock/unlock helpers, there could be a scenario where a helper
function called within the program has a BPF program attached that
tries to acquire the same lock.

> >
> > To prevent these cases, a simple fix could be adding these helpers to
> > denylist in the verifier. This fix will prevent the BPF programs from
> > being loaded by the verifier.
> >
> > previously, a similar solution was proposed to prevent recursion.
> > https://lore.kernel.org/lkml/20230417154737.12740-2-laoar.shao@gmail.com/
>
> the difference is that __rcu_read_lock/__rcu_read_unlock are called unconditionally
> (always) when executing bpf tracing probe, the problem you described above is only
> for programs calling spin lock helpers (on same spin lock)
>
> >
> > Signed-off-by: Siddharth Chintamaneni <sidchintamaneni@vt.edu>
> > ---
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 65f598694d55..8f1834f27f81 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -20617,6 +20617,10 @@ BTF_ID(func, preempt_count_sub)
> >  BTF_ID(func, __rcu_read_lock)
> >  BTF_ID(func, __rcu_read_unlock)
> >  #endif
> > +#if defined(CONFIG_DYNAMIC_FTRACE)
>
> why the CONFIG_DYNAMIC_FTRACE dependency?
As we described in the self-tests, nesting of multiple BPF programs
could only happen with fentry/fexit programs when DYNAMIC_FTRACE is
enabled. In other scenarios, when DYNAMIC_FTRACE is disabled, a BPF
program cannot be attached to any helper functions.
>
> jirka
>
> > +BTF_ID(func, bpf_spin_lock)
> > +BTF_ID(func, bpf_spin_unlock)
> > +#endif
> >  BTF_SET_END(btf_id_deny)
Signed-off-by: Siddharth Chintamaneni <sidchintamaneni@vt.edu>
---
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 65f598694d55..ffc2515195f1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20617,6 +20617,10 @@ BTF_ID(func, preempt_count_sub)
 BTF_ID(func, __rcu_read_lock)
 BTF_ID(func, __rcu_read_unlock)
 #endif
+#ifdef CONFIG_DYNAMIC_FTRACE
+BTF_ID(func, bpf_spin_lock)
+BTF_ID(func, bpf_spin_unlock)
+#endif
 BTF_SET_END(btf_id_deny)

 static bool can_be_sleepable(struct bpf_prog *prog)
diff --git a/tools/testing/selftests/bpf/prog_tests/test_dead_lock.c
b/tools/testing/selftests/bpf/prog_tests/test_dead_lock.c
new file mode 100644
index 000000000000..8e2db654e963
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_dead_lock.c
@@ -0,0 +1,26 @@
+#include <test_progs.h>
+#include "test_dead_lock.skel.h"
+
+void test_dead_lock_fail(void){
+ struct test_dead_lock *skel;
+ int prog_fd;
+ int err;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ skel = test_dead_lock__open_and_load();
+ if(!ASSERT_OK_PTR(skel, "test_dead_lock__open_and_load"))
+ goto end;
+
+ err = test_dead_lock__attach(skel);
+ if (!ASSERT_OK(err, "test_dead_lock_attach"))
+ goto end;
+
+ prog_fd = bpf_program__fd(skel->progs.dead_lock_test_main);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+end:
+ test_dead_lock__destroy(skel);
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_dead_lock.c
b/tools/testing/selftests/bpf/progs/test_dead_lock.c
new file mode 100644
index 000000000000..72c6a0b033c9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_dead_lock.c
@@ -0,0 +1,80 @@
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include <bpf/bpf_helpers.h>
+
+struct hmap_elem {
+ int cnt;
+ struct bpf_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+SEC("fexit/bpf_spin_lock")
+int dead_lock_test_inner1(void *ctx){
+
+ struct hmap_elem *val;
+ int key = 1;
+ int err = 0;
+
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (!val) {
+ goto err;
+ }
+
+ bpf_spin_lock(&val->lock);
+ val->cnt++;
+ bpf_spin_unlock(&val->lock);
+
+err:
+ return err;
+}
+
+SEC("fentry/bpf_spin_unlock")
+int dead_lock_test_inner2(void *ctx){
+
+ struct hmap_elem *val;
+ int key = 1;
+ int err = 0;
+
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (!val) {
+ goto err;
+ }
+
+ bpf_spin_lock(&val->lock);
+ val->cnt++;
+ bpf_spin_unlock(&val->lock);
+
+err:
+ return err;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int dead_lock_test_main(void *ctx){
+
+ struct hmap_elem nval = {} ,*val;
+ int key = 1;
+ int err = 0;
+
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (!val) {
+ bpf_map_update_elem(&hmap, &key, &nval, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (!val) {
+ goto err;
+ }
+ }
+
+ bpf_spin_lock(&val->lock);
+ val->cnt++;
+ bpf_spin_unlock(&val->lock);
+err:
+ return err;
+}
+
+char _license[] SEC("license") = "GPL";
Yonghong Song Feb. 4, 2024, 7:09 p.m. UTC | #3
On 2/2/24 4:21 PM, Siddharth Chintamaneni wrote:
> On Tue, 30 Jan 2024 at 04:25, Jiri Olsa <olsajiri@gmail.com> wrote:
>> On Wed, Jan 24, 2024 at 10:43:32AM -0500, Siddharth Chintamaneni wrote:
>>> While we were working on some experiments with BPF trampoline, we came
>>> across a deadlock scenario that could happen.
>>>
>>> A deadlock happens when two nested BPF programs tries to acquire the
>>> same lock i.e, If a BPF program is attached using fexit to
>>> bpf_spin_lock or using a fentry to bpf_spin_unlock, and it then
>>> attempts to acquire the same lock as the previous BPF program, a
>>> deadlock situation arises.
>>>
>>> Here is an example:
>>>
>>> SEC(fentry/bpf_spin_unlock)
>>> int fentry_2{
>>>    bpf_spin_lock(&x->lock);
>>>    bpf_spin_unlock(&x->lock);
>>> }
>>>
>>> SEC(fentry/xxx)
>>> int fentry_1{
>>>    bpf_spin_lock(&x->lock);
>>>    bpf_spin_unlock(&x->lock);
>>> }
>> hi,
>> looks like valid issue, could you add selftest for that?
> Hello,
> I have added selftest for the deadlock scenario.
>
>> I wonder we could restrict just programs that use bpf_spin_lock/bpf_spin_unlock
>> helpers? I'm not sure there's any useful use case for tracing spin lock helpers,
>> but I think we should at least try this before we deny it completely
>>
> If we restrict programs (attached to spinlock helpers) that use
> bpf_spin_lock/unlock helpers, there could be a scenario where a helper
> function called within the program has a BPF program attached that
> tries to acquire the same lock.
>
>>> To prevent these cases, a simple fix could be adding these helpers to
>>> denylist in the verifier. This fix will prevent the BPF programs from
>>> being loaded by the verifier.
>>>
>>> previously, a similar solution was proposed to prevent recursion.
>>> https://lore.kernel.org/lkml/20230417154737.12740-2-laoar.shao@gmail.com/
>> the difference is that __rcu_read_lock/__rcu_read_unlock are called unconditionally
>> (always) when executing bpf tracing probe, the problem you described above is only
>> for programs calling spin lock helpers (on same spin lock)
>>
>>> Signed-off-by: Siddharth Chintamaneni <sidchintamaneni@vt.edu>
>>> ---
>>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>>> index 65f598694d55..8f1834f27f81 100644
>>> --- a/kernel/bpf/verifier.c
>>> +++ b/kernel/bpf/verifier.c
>>> @@ -20617,6 +20617,10 @@ BTF_ID(func, preempt_count_sub)
>>>   BTF_ID(func, __rcu_read_lock)
>>>   BTF_ID(func, __rcu_read_unlock)
>>>   #endif
>>> +#if defined(CONFIG_DYNAMIC_FTRACE)
>> why the CONFIG_DYNAMIC_FTRACE dependency?
> As we described in the self-tests, nesting of multiple BPF programs
> could only happen with fentry/fexit programs when DYNAMIC_FTRACE is
> enabled. In other scenarios, when DYNAMIC_FTRACE is disabled, a BPF
> program cannot be attached to any helper functions.
>> jirka
>>
>>> +BTF_ID(func, bpf_spin_lock)
>>> +BTF_ID(func, bpf_spin_unlock)
>>> +#endif
>>>   BTF_SET_END(btf_id_deny)

Currently, we already have 'notrace' marked to bpf_spin_lock
and bpf_spin_unlock:

notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
{
         __bpf_spin_lock_irqsave(lock);
         return 0;
}
notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
{
         __bpf_spin_unlock_irqrestore(lock);
         return 0;
}

But unfortunately, BPF_CALL_* macros put notrace to the static
inline function ____bpf_spin_lock()/____bpf_spin_unlock(), and not
to static function bpf_spin_lock()/bpf_spin_unlock().

I think the following is a better fix and reflects original
intention:

diff --git a/include/linux/filter.h b/include/linux/filter.h
index fee070b9826e..779f8ee71607 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -566,6 +566,25 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
  #define BPF_CALL_4(name, ...)  BPF_CALL_x(4, name, __VA_ARGS__)
  #define BPF_CALL_5(name, ...)  BPF_CALL_x(5, name, __VA_ARGS__)
  
+#define NOTRACE_BPF_CALL_x(x, name, ...)                                              \
+       static __always_inline                                                 \
+       u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__));   \
+       typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
+       notrace u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__));         \
+       notrace u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__))          \
+       {                                                                      \
+               return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
+       }                                                                      \
+       static __always_inline                                                 \
+       u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
+
+#define NOTRACE_BPF_CALL_0(name, ...)  NOTRACE_BPF_CALL_x(0, name, __VA_ARGS__)
+#define NOTRACE_BPF_CALL_1(name, ...)  NOTRACE_BPF_CALL_x(1, name, __VA_ARGS__)
+#define NOTRACE_BPF_CALL_2(name, ...)  NOTRACE_BPF_CALL_x(2, name, __VA_ARGS__)
+#define NOTRACE_BPF_CALL_3(name, ...)  NOTRACE_BPF_CALL_x(3, name, __VA_ARGS__)
+#define NOTRACE_BPF_CALL_4(name, ...)  NOTRACE_BPF_CALL_x(4, name, __VA_ARGS__)
+#define NOTRACE_BPF_CALL_5(name, ...)  NOTRACE_BPF_CALL_x(5, name, __VA_ARGS__)
+
  #define bpf_ctx_range(TYPE, MEMBER)                                            \
         offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
  #define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2)                             \
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4db1c658254c..87136e27a99a 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -334,7 +334,7 @@ static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
         __this_cpu_write(irqsave_flags, flags);
  }
  
-notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
+NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
  {
         __bpf_spin_lock_irqsave(lock);
         return 0;
@@ -357,7 +357,7 @@ static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
         local_irq_restore(flags);
  }
  
-notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
+NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
  {
         __bpf_spin_unlock_irqrestore(lock);
         return 0;


Macros NOTRACE_BPF_CALL_*() could be consolated with BPF_CALL_*() but I think
a separate macro might be easier to understand.

> Signed-off-by: Siddharth Chintamaneni <sidchintamaneni@vt.edu>
> ---
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 65f598694d55..ffc2515195f1 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -20617,6 +20617,10 @@ BTF_ID(func, preempt_count_sub)
>   BTF_ID(func, __rcu_read_lock)
>   BTF_ID(func, __rcu_read_unlock)
>   #endif
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +BTF_ID(func, bpf_spin_lock)
> +BTF_ID(func, bpf_spin_unlock)
> +#endif
>   BTF_SET_END(btf_id_deny)
>
>   static bool can_be_sleepable(struct bpf_prog *prog)
[...]
Siddharth Chintamaneni Feb. 7, 2024, 12:21 a.m. UTC | #4
On Sun, 4 Feb 2024 at 14:09, Yonghong Song <yonghong.song@linux.dev> wrote:
>
>
> On 2/2/24 4:21 PM, Siddharth Chintamaneni wrote:
> > On Tue, 30 Jan 2024 at 04:25, Jiri Olsa <olsajiri@gmail.com> wrote:
> >> On Wed, Jan 24, 2024 at 10:43:32AM -0500, Siddharth Chintamaneni wrote:
> >>> While we were working on some experiments with BPF trampoline, we came
> >>> across a deadlock scenario that could happen.
> >>>
> >>> A deadlock happens when two nested BPF programs tries to acquire the
> >>> same lock i.e, If a BPF program is attached using fexit to
> >>> bpf_spin_lock or using a fentry to bpf_spin_unlock, and it then
> >>> attempts to acquire the same lock as the previous BPF program, a
> >>> deadlock situation arises.
> >>>
> >>> Here is an example:
> >>>
> >>> SEC(fentry/bpf_spin_unlock)
> >>> int fentry_2{
> >>>    bpf_spin_lock(&x->lock);
> >>>    bpf_spin_unlock(&x->lock);
> >>> }
> >>>
> >>> SEC(fentry/xxx)
> >>> int fentry_1{
> >>>    bpf_spin_lock(&x->lock);
> >>>    bpf_spin_unlock(&x->lock);
> >>> }
> >> hi,
> >> looks like valid issue, could you add selftest for that?
> > Hello,
> > I have added selftest for the deadlock scenario.
> >
> >> I wonder we could restrict just programs that use bpf_spin_lock/bpf_spin_unlock
> >> helpers? I'm not sure there's any useful use case for tracing spin lock helpers,
> >> but I think we should at least try this before we deny it completely
> >>
> > If we restrict programs (attached to spinlock helpers) that use
> > bpf_spin_lock/unlock helpers, there could be a scenario where a helper
> > function called within the program has a BPF program attached that
> > tries to acquire the same lock.
> >
> >>> To prevent these cases, a simple fix could be adding these helpers to
> >>> denylist in the verifier. This fix will prevent the BPF programs from
> >>> being loaded by the verifier.
> >>>
> >>> previously, a similar solution was proposed to prevent recursion.
> >>> https://lore.kernel.org/lkml/20230417154737.12740-2-laoar.shao@gmail.com/
> >> the difference is that __rcu_read_lock/__rcu_read_unlock are called unconditionally
> >> (always) when executing bpf tracing probe, the problem you described above is only
> >> for programs calling spin lock helpers (on same spin lock)
> >>
> >>> Signed-off-by: Siddharth Chintamaneni <sidchintamaneni@vt.edu>
> >>> ---
> >>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> >>> index 65f598694d55..8f1834f27f81 100644
> >>> --- a/kernel/bpf/verifier.c
> >>> +++ b/kernel/bpf/verifier.c
> >>> @@ -20617,6 +20617,10 @@ BTF_ID(func, preempt_count_sub)
> >>>   BTF_ID(func, __rcu_read_lock)
> >>>   BTF_ID(func, __rcu_read_unlock)
> >>>   #endif
> >>> +#if defined(CONFIG_DYNAMIC_FTRACE)
> >> why the CONFIG_DYNAMIC_FTRACE dependency?
> > As we described in the self-tests, nesting of multiple BPF programs
> > could only happen with fentry/fexit programs when DYNAMIC_FTRACE is
> > enabled. In other scenarios, when DYNAMIC_FTRACE is disabled, a BPF
> > program cannot be attached to any helper functions.
> >> jirka
> >>
> >>> +BTF_ID(func, bpf_spin_lock)
> >>> +BTF_ID(func, bpf_spin_unlock)
> >>> +#endif
> >>>   BTF_SET_END(btf_id_deny)
>
> Currently, we already have 'notrace' marked to bpf_spin_lock
> and bpf_spin_unlock:
>
> notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
> {
>          __bpf_spin_lock_irqsave(lock);
>          return 0;
> }
> notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
> {
>          __bpf_spin_unlock_irqrestore(lock);
>          return 0;
> }
>
> But unfortunately, BPF_CALL_* macros put notrace to the static
> inline function ____bpf_spin_lock()/____bpf_spin_unlock(), and not
> to static function bpf_spin_lock()/bpf_spin_unlock().
>
> I think the following is a better fix and reflects original
> intention:

My bad, I somehow incorrectly tested the fix using the notrace macro
and didn't realize that it is because of inlining. You are right, and
I agree that the proposed solution fixes the unintended bug.



>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index fee070b9826e..779f8ee71607 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -566,6 +566,25 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
>   #define BPF_CALL_4(name, ...)  BPF_CALL_x(4, name, __VA_ARGS__)
>   #define BPF_CALL_5(name, ...)  BPF_CALL_x(5, name, __VA_ARGS__)
>
> +#define NOTRACE_BPF_CALL_x(x, name, ...)                                              \
> +       static __always_inline                                                 \
> +       u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__));   \
> +       typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
> +       notrace u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__));         \
> +       notrace u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__))          \
> +       {                                                                      \
> +               return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
> +       }                                                                      \
> +       static __always_inline                                                 \
> +       u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
> +
> +#define NOTRACE_BPF_CALL_0(name, ...)  NOTRACE_BPF_CALL_x(0, name, __VA_ARGS__)
> +#define NOTRACE_BPF_CALL_1(name, ...)  NOTRACE_BPF_CALL_x(1, name, __VA_ARGS__)
> +#define NOTRACE_BPF_CALL_2(name, ...)  NOTRACE_BPF_CALL_x(2, name, __VA_ARGS__)
> +#define NOTRACE_BPF_CALL_3(name, ...)  NOTRACE_BPF_CALL_x(3, name, __VA_ARGS__)
> +#define NOTRACE_BPF_CALL_4(name, ...)  NOTRACE_BPF_CALL_x(4, name, __VA_ARGS__)
> +#define NOTRACE_BPF_CALL_5(name, ...)  NOTRACE_BPF_CALL_x(5, name, __VA_ARGS__)
> +
>   #define bpf_ctx_range(TYPE, MEMBER)                                            \
>          offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
>   #define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2)                             \
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 4db1c658254c..87136e27a99a 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -334,7 +334,7 @@ static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
>          __this_cpu_write(irqsave_flags, flags);
>   }
>
> -notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
> +NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
>   {
>          __bpf_spin_lock_irqsave(lock);
>          return 0;
> @@ -357,7 +357,7 @@ static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
>          local_irq_restore(flags);
>   }
>
> -notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
> +NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
>   {
>          __bpf_spin_unlock_irqrestore(lock);
>          return 0;
>
>
> Macros NOTRACE_BPF_CALL_*() could be consolated with BPF_CALL_*() but I think
> a separate macro might be easier to understand.
>
> > Signed-off-by: Siddharth Chintamaneni <sidchintamaneni@vt.edu>
> > ---
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 65f598694d55..ffc2515195f1 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -20617,6 +20617,10 @@ BTF_ID(func, preempt_count_sub)
> >   BTF_ID(func, __rcu_read_lock)
> >   BTF_ID(func, __rcu_read_unlock)
> >   #endif
> > +#ifdef CONFIG_DYNAMIC_FTRACE
> > +BTF_ID(func, bpf_spin_lock)
> > +BTF_ID(func, bpf_spin_unlock)
> > +#endif
> >   BTF_SET_END(btf_id_deny)
> >
> >   static bool can_be_sleepable(struct bpf_prog *prog)
> [...]
Yonghong Song Feb. 7, 2024, 4:25 a.m. UTC | #5
On 2/6/24 4:21 PM, Siddharth Chintamaneni wrote:
> On Sun, 4 Feb 2024 at 14:09, Yonghong Song <yonghong.song@linux.dev> wrote:
>>
>> On 2/2/24 4:21 PM, Siddharth Chintamaneni wrote:
>>> On Tue, 30 Jan 2024 at 04:25, Jiri Olsa <olsajiri@gmail.com> wrote:
>>>> On Wed, Jan 24, 2024 at 10:43:32AM -0500, Siddharth Chintamaneni wrote:
>>>>> While we were working on some experiments with BPF trampoline, we came
>>>>> across a deadlock scenario that could happen.
>>>>>
>>>>> A deadlock happens when two nested BPF programs tries to acquire the
>>>>> same lock i.e, If a BPF program is attached using fexit to
>>>>> bpf_spin_lock or using a fentry to bpf_spin_unlock, and it then
>>>>> attempts to acquire the same lock as the previous BPF program, a
>>>>> deadlock situation arises.
>>>>>
>>>>> Here is an example:
>>>>>
>>>>> SEC(fentry/bpf_spin_unlock)
>>>>> int fentry_2{
>>>>>     bpf_spin_lock(&x->lock);
>>>>>     bpf_spin_unlock(&x->lock);
>>>>> }
>>>>>
>>>>> SEC(fentry/xxx)
>>>>> int fentry_1{
>>>>>     bpf_spin_lock(&x->lock);
>>>>>     bpf_spin_unlock(&x->lock);
>>>>> }
>>>> hi,
>>>> looks like valid issue, could you add selftest for that?
>>> Hello,
>>> I have added selftest for the deadlock scenario.
>>>
>>>> I wonder we could restrict just programs that use bpf_spin_lock/bpf_spin_unlock
>>>> helpers? I'm not sure there's any useful use case for tracing spin lock helpers,
>>>> but I think we should at least try this before we deny it completely
>>>>
>>> If we restrict programs (attached to spinlock helpers) that use
>>> bpf_spin_lock/unlock helpers, there could be a scenario where a helper
>>> function called within the program has a BPF program attached that
>>> tries to acquire the same lock.
>>>
>>>>> To prevent these cases, a simple fix could be adding these helpers to
>>>>> denylist in the verifier. This fix will prevent the BPF programs from
>>>>> being loaded by the verifier.
>>>>>
>>>>> previously, a similar solution was proposed to prevent recursion.
>>>>> https://lore.kernel.org/lkml/20230417154737.12740-2-laoar.shao@gmail.com/
>>>> the difference is that __rcu_read_lock/__rcu_read_unlock are called unconditionally
>>>> (always) when executing bpf tracing probe, the problem you described above is only
>>>> for programs calling spin lock helpers (on same spin lock)
>>>>
>>>>> Signed-off-by: Siddharth Chintamaneni <sidchintamaneni@vt.edu>
>>>>> ---
>>>>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>>>>> index 65f598694d55..8f1834f27f81 100644
>>>>> --- a/kernel/bpf/verifier.c
>>>>> +++ b/kernel/bpf/verifier.c
>>>>> @@ -20617,6 +20617,10 @@ BTF_ID(func, preempt_count_sub)
>>>>>    BTF_ID(func, __rcu_read_lock)
>>>>>    BTF_ID(func, __rcu_read_unlock)
>>>>>    #endif
>>>>> +#if defined(CONFIG_DYNAMIC_FTRACE)
>>>> why the CONFIG_DYNAMIC_FTRACE dependency?
>>> As we described in the self-tests, nesting of multiple BPF programs
>>> could only happen with fentry/fexit programs when DYNAMIC_FTRACE is
>>> enabled. In other scenarios, when DYNAMIC_FTRACE is disabled, a BPF
>>> program cannot be attached to any helper functions.
>>>> jirka
>>>>
>>>>> +BTF_ID(func, bpf_spin_lock)
>>>>> +BTF_ID(func, bpf_spin_unlock)
>>>>> +#endif
>>>>>    BTF_SET_END(btf_id_deny)
>> Currently, we already have 'notrace' marked to bpf_spin_lock
>> and bpf_spin_unlock:
>>
>> notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
>> {
>>           __bpf_spin_lock_irqsave(lock);
>>           return 0;
>> }
>> notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
>> {
>>           __bpf_spin_unlock_irqrestore(lock);
>>           return 0;
>> }
>>
>> But unfortunately, BPF_CALL_* macros put notrace to the static
>> inline function ____bpf_spin_lock()/____bpf_spin_unlock(), and not
>> to static function bpf_spin_lock()/bpf_spin_unlock().
>>
>> I think the following is a better fix and reflects original
>> intention:
> My bad, I somehow incorrectly tested the fix using the notrace macro
> and didn't realize that it is because of inlining. You are right, and
> I agree that the proposed solution fixes the unintended bug.

Thanks for confirmation, I will send a formal patch later.

>
>
>
>> diff --git a/include/linux/filter.h b/include/linux/filter.h
>> index fee070b9826e..779f8ee71607 100644
>> --- a/include/linux/filter.h
>> +++ b/include/linux/filter.h
>> @@ -566,6 +566,25 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
>>    #define BPF_CALL_4(name, ...)  BPF_CALL_x(4, name, __VA_ARGS__)
>>    #define BPF_CALL_5(name, ...)  BPF_CALL_x(5, name, __VA_ARGS__)
>>
>> +#define NOTRACE_BPF_CALL_x(x, name, ...)                                              \
>> +       static __always_inline                                                 \
>> +       u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__));   \
>> +       typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
>> +       notrace u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__));         \
>> +       notrace u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__))          \
>> +       {                                                                      \
>> +               return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
>> +       }                                                                      \
>> +       static __always_inline                                                 \
>> +       u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
>> +
>> +#define NOTRACE_BPF_CALL_0(name, ...)  NOTRACE_BPF_CALL_x(0, name, __VA_ARGS__)
>> +#define NOTRACE_BPF_CALL_1(name, ...)  NOTRACE_BPF_CALL_x(1, name, __VA_ARGS__)
>> +#define NOTRACE_BPF_CALL_2(name, ...)  NOTRACE_BPF_CALL_x(2, name, __VA_ARGS__)
>> +#define NOTRACE_BPF_CALL_3(name, ...)  NOTRACE_BPF_CALL_x(3, name, __VA_ARGS__)
>> +#define NOTRACE_BPF_CALL_4(name, ...)  NOTRACE_BPF_CALL_x(4, name, __VA_ARGS__)
>> +#define NOTRACE_BPF_CALL_5(name, ...)  NOTRACE_BPF_CALL_x(5, name, __VA_ARGS__)
>> +
>>    #define bpf_ctx_range(TYPE, MEMBER)                                            \
>>           offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
>>    #define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2)                             \
>> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
>> index 4db1c658254c..87136e27a99a 100644
>> --- a/kernel/bpf/helpers.c
>> +++ b/kernel/bpf/helpers.c
>> @@ -334,7 +334,7 @@ static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
>>           __this_cpu_write(irqsave_flags, flags);
>>    }
>>
>> -notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
>> +NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
>>    {
>>           __bpf_spin_lock_irqsave(lock);
>>           return 0;
>> @@ -357,7 +357,7 @@ static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
>>           local_irq_restore(flags);
>>    }
>>
>> -notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
>> +NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
>>    {
>>           __bpf_spin_unlock_irqrestore(lock);
>>           return 0;
>>
>>
>> Macros NOTRACE_BPF_CALL_*() could be consolated with BPF_CALL_*() but I think
>> a separate macro might be easier to understand.
>>
[...]
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 65f598694d55..8f1834f27f81 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20617,6 +20617,10 @@  BTF_ID(func, preempt_count_sub)
 BTF_ID(func, __rcu_read_lock)
 BTF_ID(func, __rcu_read_unlock)
 #endif
+#if defined(CONFIG_DYNAMIC_FTRACE)
+BTF_ID(func, bpf_spin_lock)
+BTF_ID(func, bpf_spin_unlock)
+#endif
 BTF_SET_END(btf_id_deny)