diff mbox series

[bpf-next,v3,2/4] bpf: Prevent extending tail callee prog with freplace

Message ID 20240923134044.22388-3-leon.hwang@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Fix tailcall infinite loop caused by freplace | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 215 this patch: 215
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 7 maintainers not CCed: song@kernel.org sdf@fomichev.me haoluo@google.com jolsa@kernel.org kpsingh@kernel.org martin.lau@linux.dev john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 274 this patch: 274
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6948 this patch: 6948
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 43 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Leon Hwang Sept. 23, 2024, 1:40 p.m. UTC
Alongside previous patch, the infinite loop issue caused by combination of
tailcal and freplace can be prevented completely.

The previous patch can not prevent the use case that updates a prog to
prog_array map and then extends the prog with freplace prog.

This patch fixes the case by preventing extending a prog, which has been
updated to prog_array map, with freplace prog.

If a prog has been updated to prog_array map, it or its subprog can not
be extended by freplace prog.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/bpf.h   |  1 +
 kernel/bpf/arraymap.c |  6 +++++-
 kernel/bpf/syscall.c  | 12 ++++++++++++
 3 files changed, 18 insertions(+), 1 deletion(-)

Comments

Eduard Zingerman Sept. 25, 2024, 5:32 a.m. UTC | #1
On Mon, 2024-09-23 at 21:40 +0800, Leon Hwang wrote:

[...]

> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 048aa2625cbef..b864b37e67c17 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1484,6 +1484,7 @@ struct bpf_prog_aux {
>  	bool exception_cb;
>  	bool exception_boundary;
>  	bool is_extended; /* true if extended by freplace program */
> +	atomic_t tail_callee_cnt;

Nit: the name is a bit misleading, this counts how many times the
     program resides it prog maps. Confusing w/o additional comments.
     Maybe something like 'member_of_prog_array_cnt'?

>  	struct bpf_arena *arena;
>  	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
>  	const struct btf_type *attach_func_proto;
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index 8d97bae98fa70..c12e0e3bf6ad0 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -961,13 +961,17 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
>  		return ERR_PTR(-EINVAL);
>  	}
>  
> +	atomic_inc(&prog->aux->tail_callee_cnt);
>  	return prog;
>  }

[...]

>  static u32 prog_fd_array_sys_lookup_elem(void *ptr)
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 18b3f9216b050..be829016d8182 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -3501,6 +3501,18 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
>  		tgt_prog = prog->aux->dst_prog;
>  	}
>  
> +	if (prog->type == BPF_PROG_TYPE_EXT &&
> +	    atomic_read(&tgt_prog->aux->tail_callee_cnt)) {
> +		/* Program extensions can not extend target prog when the target
> +		 * prog has been updated to any prog_array map as tail callee.
> +		 * It's to prevent a potential infinite loop like:
> +		 * tgt prog entry -> tgt prog subprog -> freplace prog entry
> +		 * --tailcall-> tgt prog entry.
> +		 */
> +		err = -EINVAL;
> +		goto out_unlock;
> +	}
> +
>  	err = bpf_link_prime(&link->link.link, &link_primer);
>  	if (err)
>  		goto out_unlock;

Is it possible there is a race between map update and prog attach?
E.g. suppose the following sequence of events:
- thread #1 enters prog_fd_array_get_ptr()
- thread #1 successfully completes prog->aux->is_extended check (not extended)
- thread #2 enters bpf_tracing_prog_attach()
- thread #2 does atomic_read() for tgt_prog and it returns 0
- thread #2 proceeds attaching freplace to tgt_prog
- thread #1 does atomic_inc(&prog->aux->tail_callee_cnt)

Thus arriving to a state when tgt_prog is both a member of a map and
is freplaced. Is this a valid scenario?
Leon Hwang Sept. 26, 2024, 7:19 a.m. UTC | #2
On 25/9/24 13:32, Eduard Zingerman wrote:
> On Mon, 2024-09-23 at 21:40 +0800, Leon Hwang wrote:
> 
> [...]
> 
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 048aa2625cbef..b864b37e67c17 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1484,6 +1484,7 @@ struct bpf_prog_aux {
>>  	bool exception_cb;
>>  	bool exception_boundary;
>>  	bool is_extended; /* true if extended by freplace program */
>> +	atomic_t tail_callee_cnt;
> 
> Nit: the name is a bit misleading, this counts how many times the
>      program resides it prog maps. Confusing w/o additional comments.
>      Maybe something like 'member_of_prog_array_cnt'?
> 

'member_of_prog_array_cnt' is not accurate enough.

'prog_array_member_cnt' is better, and should alongside comment /*
counts how many times as member of prog_array */.

>>  	struct bpf_arena *arena;
>>  	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
>>  	const struct btf_type *attach_func_proto;
>> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
>> index 8d97bae98fa70..c12e0e3bf6ad0 100644
>> --- a/kernel/bpf/arraymap.c
>> +++ b/kernel/bpf/arraymap.c
>> @@ -961,13 +961,17 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
>>  		return ERR_PTR(-EINVAL);
>>  	}
>>  
>> +	atomic_inc(&prog->aux->tail_callee_cnt);
>>  	return prog;
>>  }
> 
> [...]
> 
>>  static u32 prog_fd_array_sys_lookup_elem(void *ptr)
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 18b3f9216b050..be829016d8182 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -3501,6 +3501,18 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
>>  		tgt_prog = prog->aux->dst_prog;
>>  	}
>>  
>> +	if (prog->type == BPF_PROG_TYPE_EXT &&
>> +	    atomic_read(&tgt_prog->aux->tail_callee_cnt)) {
>> +		/* Program extensions can not extend target prog when the target
>> +		 * prog has been updated to any prog_array map as tail callee.
>> +		 * It's to prevent a potential infinite loop like:
>> +		 * tgt prog entry -> tgt prog subprog -> freplace prog entry
>> +		 * --tailcall-> tgt prog entry.
>> +		 */
>> +		err = -EINVAL;
>> +		goto out_unlock;
>> +	}
>> +
>>  	err = bpf_link_prime(&link->link.link, &link_primer);
>>  	if (err)
>>  		goto out_unlock;
> 
> Is it possible there is a race between map update and prog attach?

Yes, it is possible.

> E.g. suppose the following sequence of events:
> - thread #1 enters prog_fd_array_get_ptr()
> - thread #1 successfully completes prog->aux->is_extended check (not extended)
> - thread #2 enters bpf_tracing_prog_attach()
> - thread #2 does atomic_read() for tgt_prog and it returns 0
> - thread #2 proceeds attaching freplace to tgt_prog
> - thread #1 does atomic_inc(&prog->aux->tail_callee_cnt)
> 
> Thus arriving to a state when tgt_prog is both a member of a map and
> is freplaced. Is this a valid scenario?
> 

This patch series aims to prevent such case that tgt_prog is a member of
prog_array and is freplaced at the same time.

Without this patch series, a prog can be extended by freplace prog and then
be updated to prog_array, or can be updated to prog_array and then be
extended by freplace prog, in order to construct such case.

This patch aims to prevent "be updated to prog_array and then be extended
by freplace prog".
The previous patch aims to prevent "be extended by freplace prog and then
be updated to prog_array".

So, in order to avoid the above case:

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index a43e62e2a8bb..da4e26029a33 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -948,7 +948,9 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
        if (IS_ERR(prog))
                return prog;

-       if (!bpf_prog_map_compatible(map, prog)) {
+       atomic_inc(&prog->aux->tail_callee_cnt);
+       if (!bpf_prog_map_compatible(map, prog) || prog->aux->is_extended) {
+               atomic_dec(&prog->aux->tail_callee_cnt);
                bpf_prog_put(prog);
                return ERR_PTR(-EINVAL);
        }

1. Increment tail_callee_cnt.
2. Decrement tail_callee_cnt, if prog->aux->is_extended.

Then, thread #2 does atomic_read() for tgt_prog, and it won't return 0.

Thanks,
Leon
Eduard Zingerman Sept. 27, 2024, 10:58 a.m. UTC | #3
On Thu, 2024-09-26 at 15:19 +0800, Leon Hwang wrote:

[...]

> > E.g. suppose the following sequence of events:
> > - thread #1 enters prog_fd_array_get_ptr()
> > - thread #1 successfully completes prog->aux->is_extended check (not extended)
> > - thread #2 enters bpf_tracing_prog_attach()
> > - thread #2 does atomic_read() for tgt_prog and it returns 0
> > - thread #2 proceeds attaching freplace to tgt_prog
> > - thread #1 does atomic_inc(&prog->aux->tail_callee_cnt)
> > 
> > Thus arriving to a state when tgt_prog is both a member of a map and
> > is freplaced. Is this a valid scenario?
> > 
> 
> This patch series aims to prevent such case that tgt_prog is a member of
> prog_array and is freplaced at the same time.
> 
> Without this patch series, a prog can be extended by freplace prog and then
> be updated to prog_array, or can be updated to prog_array and then be
> extended by freplace prog, in order to construct such case.
> 
> This patch aims to prevent "be updated to prog_array and then be extended
> by freplace prog".
> The previous patch aims to prevent "be extended by freplace prog and then
> be updated to prog_array".
> 
> So, in order to avoid the above case:
> 
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index a43e62e2a8bb..da4e26029a33 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -948,7 +948,9 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
>         if (IS_ERR(prog))
>                 return prog;
> 
> -       if (!bpf_prog_map_compatible(map, prog)) {
> +       atomic_inc(&prog->aux->tail_callee_cnt);
> +       if (!bpf_prog_map_compatible(map, prog) || prog->aux->is_extended) {
> +               atomic_dec(&prog->aux->tail_callee_cnt);
>                 bpf_prog_put(prog);
>                 return ERR_PTR(-EINVAL);
>         }

I'm not sure this really solves the issue.
Documentation for both 'atomic_inc' and 'atomic_read'
(used in bpf_tracing_prog_attach()) says that these are operations with
relaxed memory ordering. Meaning that e.g. 'atomic_inc' executed
inside prog_fd_array_get_ptr() is not necessarily immediately visible
for other thread executing 'atomic_read' in bpf_tracing_prog_attach().
I think that some memory barrier is needed (non-relaxed func variant).

But all this gets unnecessarily complicated, neither
prog_fd_array_get_ptr() nor bpf_tracing_prog_attach() are executed
often, I think that 'tail_callee_cnt' and 'is_extended' should be
protected by a mutex.
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 048aa2625cbef..b864b37e67c17 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1484,6 +1484,7 @@  struct bpf_prog_aux {
 	bool exception_cb;
 	bool exception_boundary;
 	bool is_extended; /* true if extended by freplace program */
+	atomic_t tail_callee_cnt;
 	struct bpf_arena *arena;
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 8d97bae98fa70..c12e0e3bf6ad0 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -961,13 +961,17 @@  static void *prog_fd_array_get_ptr(struct bpf_map *map,
 		return ERR_PTR(-EINVAL);
 	}
 
+	atomic_inc(&prog->aux->tail_callee_cnt);
 	return prog;
 }
 
 static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
 {
+	struct bpf_prog *prog = ptr;
+
 	/* bpf_prog is freed after one RCU or tasks trace grace period */
-	bpf_prog_put(ptr);
+	atomic_dec(&prog->aux->tail_callee_cnt);
+	bpf_prog_put(prog);
 }
 
 static u32 prog_fd_array_sys_lookup_elem(void *ptr)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 18b3f9216b050..be829016d8182 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3501,6 +3501,18 @@  static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 		tgt_prog = prog->aux->dst_prog;
 	}
 
+	if (prog->type == BPF_PROG_TYPE_EXT &&
+	    atomic_read(&tgt_prog->aux->tail_callee_cnt)) {
+		/* Program extensions can not extend target prog when the target
+		 * prog has been updated to any prog_array map as tail callee.
+		 * It's to prevent a potential infinite loop like:
+		 * tgt prog entry -> tgt prog subprog -> freplace prog entry
+		 * --tailcall-> tgt prog entry.
+		 */
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
 	err = bpf_link_prime(&link->link.link, &link_primer);
 	if (err)
 		goto out_unlock;