diff mbox series

[bpf-next,v4,1/4] bpf: Prevent updating extended prog to prog_array map

Message ID 20240929132757.79826-2-leon.hwang@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Fix tailcall infinite loop caused by freplace | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 213 this patch: 213
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 7 maintainers not CCed: song@kernel.org sdf@fomichev.me haoluo@google.com jolsa@kernel.org kpsingh@kernel.org martin.lau@linux.dev john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 272 this patch: 272
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6964 this patch: 6964
netdev/checkpatch warning WARNING: The commit message has 'Call Trace:', perhaps it also needs a 'Fixes:' tag?
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Leon Hwang Sept. 29, 2024, 1:27 p.m. UTC
This patch partially prevents a potential infinite loop issue caused by
combination of tailcal and freplace.

For example:

tc_bpf2bpf.c:

// SPDX-License-Identifier: GPL-2.0

\#include <linux/bpf.h>
\#include <bpf/bpf_helpers.h>

__noinline
int subprog_tc(struct __sk_buff *skb)
{
	return skb->len * 2;
}

SEC("tc")
int entry_tc(struct __sk_buff *skb)
{
	return subprog_tc(skb);
}

char __license[] SEC("license") = "GPL";

tailcall_freplace.c:

// SPDX-License-Identifier: GPL-2.0

\#include <linux/bpf.h>
\#include <bpf/bpf_helpers.h>

struct {
	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
	__uint(max_entries, 1);
	__uint(key_size, sizeof(__u32));
	__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");

int count = 0;

SEC("freplace")
int entry_freplace(struct __sk_buff *skb)
{
	count++;
	bpf_tail_call_static(skb, &jmp_table, 0);
	return count;
}

char __license[] SEC("license") = "GPL";

The attach target of entry_freplace is subprog_tc, and the tail callee
in entry_freplace is entry_tc.

Then, the infinite loop will be entry_tc -> subprog_tc -> entry_freplace
--tailcall-> entry_tc, because tail_call_cnt in entry_freplace will count
from zero for every time of entry_freplace execution. Kernel will panic,
like:

[   15.310490] BUG: TASK stack guard page was hit at (____ptrval____)
(stack is (____ptrval____)..(____ptrval____))
[   15.310490] Oops: stack guard page: 0000 [#1] PREEMPT SMP NOPTI
[   15.310490] CPU: 1 PID: 89 Comm: test_progs Tainted: G           OE
   6.10.0-rc6-g026dcdae8d3e-dirty #72
[   15.310490] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX,
1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[   15.310490] RIP: 0010:bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
[   15.310490] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 0f 1f 00 55 48 89 e5 f3 0f 1e
fa <50> 50 53 41 55 48 89 fb 49 bd 00 2a 46 82 98 9c ff ff 48 89 df 4c
[   15.310490] RSP: 0018:ffffb500c0aa0000 EFLAGS: 00000202
[   15.310490] RAX: ffffb500c0aa0028 RBX: ffff9c98808b7e00 RCX:
0000000000008cb5
[   15.310490] RDX: 0000000000000000 RSI: ffff9c9882462a00 RDI:
ffff9c98808b7e00
[   15.310490] RBP: ffffb500c0aa0000 R08: 0000000000000000 R09:
0000000000000000
[   15.310490] R10: 0000000000000001 R11: 0000000000000000 R12:
ffffb500c01af000
[   15.310490] R13: ffffb500c01cd000 R14: 0000000000000000 R15:
0000000000000000
[   15.310490] FS:  00007f133b665140(0000) GS:ffff9c98bbd00000(0000)
knlGS:0000000000000000
[   15.310490] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   15.310490] CR2: ffffb500c0a9fff8 CR3: 0000000102478000 CR4:
00000000000006f0
[   15.310490] Call Trace:
[   15.310490]  <#DF>
[   15.310490]  ? die+0x36/0x90
[   15.310490]  ? handle_stack_overflow+0x4d/0x60
[   15.310490]  ? exc_double_fault+0x117/0x1a0
[   15.310490]  ? asm_exc_double_fault+0x23/0x30
[   15.310490]  ? bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
[   15.310490]  </#DF>
[   15.310490]  <TASK>
[   15.310490]  bpf_prog_85781a698094722f_entry+0x4c/0x64
[   15.310490]  bpf_prog_1c515f389a9059b4_entry2+0x19/0x1b
[   15.310490]  ...
[   15.310490]  bpf_prog_85781a698094722f_entry+0x4c/0x64
[   15.310490]  bpf_prog_1c515f389a9059b4_entry2+0x19/0x1b
[   15.310490]  bpf_test_run+0x210/0x370
[   15.310490]  ? bpf_test_run+0x128/0x370
[   15.310490]  bpf_prog_test_run_skb+0x388/0x7a0
[   15.310490]  __sys_bpf+0xdbf/0x2c40
[   15.310490]  ? clockevents_program_event+0x52/0xf0
[   15.310490]  ? lock_release+0xbf/0x290
[   15.310490]  __x64_sys_bpf+0x1e/0x30
[   15.310490]  do_syscall_64+0x68/0x140
[   15.310490]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   15.310490] RIP: 0033:0x7f133b52725d
[   15.310490] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa
48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f
05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8b bb 0d 00 f7 d8 64 89 01 48
[   15.310490] RSP: 002b:00007ffddbc10258 EFLAGS: 00000206 ORIG_RAX:
0000000000000141
[   15.310490] RAX: ffffffffffffffda RBX: 00007ffddbc10828 RCX:
00007f133b52725d
[   15.310490] RDX: 0000000000000050 RSI: 00007ffddbc102a0 RDI:
000000000000000a
[   15.310490] RBP: 00007ffddbc10270 R08: 0000000000000000 R09:
00007ffddbc102a0
[   15.310490] R10: 0000000000000064 R11: 0000000000000206 R12:
0000000000000004
[   15.310490] R13: 0000000000000000 R14: 0000558ec4c24890 R15:
00007f133b6ed000
[   15.310490]  </TASK>
[   15.310490] Modules linked in: bpf_testmod(OE)
[   15.310490] ---[ end trace 0000000000000000 ]---
[   15.310490] RIP: 0010:bpf_prog_3a140cef239a4b4f_subprog_tail+0x14/0x53
[   15.310490] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 0f 1f 00 55 48 89 e5 f3 0f 1e
fa <50> 50 53 41 55 48 89 fb 49 bd 00 2a 46 82 98 9c ff ff 48 89 df 4c
[   15.310490] RSP: 0018:ffffb500c0aa0000 EFLAGS: 00000202
[   15.310490] RAX: ffffb500c0aa0028 RBX: ffff9c98808b7e00 RCX:
0000000000008cb5
[   15.310490] RDX: 0000000000000000 RSI: ffff9c9882462a00 RDI:
ffff9c98808b7e00
[   15.310490] RBP: ffffb500c0aa0000 R08: 0000000000000000 R09:
0000000000000000
[   15.310490] R10: 0000000000000001 R11: 0000000000000000 R12:
ffffb500c01af000
[   15.310490] R13: ffffb500c01cd000 R14: 0000000000000000 R15:
0000000000000000
[   15.310490] FS:  00007f133b665140(0000) GS:ffff9c98bbd00000(0000)
knlGS:0000000000000000
[   15.310490] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   15.310490] CR2: ffffb500c0a9fff8 CR3: 0000000102478000 CR4:
00000000000006f0
[   15.310490] Kernel panic - not syncing: Fatal exception in interrupt
[   15.310490] Kernel Offset: 0x30000000 from 0xffffffff81000000
(relocation range: 0xffffffff80000000-0xffffffffbfffffff)

This patch partially prevents this panic by preventing updating extended
prog to prog_array map.

If a prog or its subprog has been extended by freplace prog, the prog
can not be updated to prog_array map.

Alongside next patch, the panic will be prevented completely.

BTW, fix a minor style issue by replacing 8-spaces with a tab.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/bpf.h   |  2 ++
 kernel/bpf/arraymap.c | 21 +++++++++++++++++----
 kernel/bpf/core.c     |  1 +
 kernel/bpf/syscall.c  | 42 ++++++++++++++++++++++++++++++++++++------
 4 files changed, 56 insertions(+), 10 deletions(-)

Comments

Eduard Zingerman Oct. 1, 2024, 11:13 a.m. UTC | #1
On Sun, 2024-09-29 at 21:27 +0800, Leon Hwang wrote:

[...]

> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index 79660e3fca4c1..4a4de4f014be9 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -947,16 +947,29 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
>  				   struct file *map_file, int fd)
>  {
>  	struct bpf_prog *prog = bpf_prog_get(fd);
> +	bool is_extended;
>  
>  	if (IS_ERR(prog))
>  		return prog;
>  
> -	if (!bpf_prog_map_compatible(map, prog)) {
> -		bpf_prog_put(prog);
> -		return ERR_PTR(-EINVAL);
> -	}
> +	if (!bpf_prog_map_compatible(map, prog))
> +		goto out_put_prog;
> +
> +	mutex_lock(&prog->aux->ext_mutex);
> +	is_extended = prog->aux->is_extended;
> +	mutex_unlock(&prog->aux->ext_mutex);
> +	if (is_extended)
> +		/* Extended prog can not be tail callee. It's to prevent a
> +		 * potential infinite loop like:
> +		 * tail callee prog entry -> tail callee prog subprog ->
> +		 * freplace prog entry --tailcall-> tail callee prog entry.
> +		 */
> +		goto out_put_prog;

Nit: I think return value should be -EBUSY in this case.

>  
>  	return prog;
> +out_put_prog:
> +	bpf_prog_put(prog);
> +	return ERR_PTR(-EINVAL);
>  }
>

[...]

> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index a8f1808a1ca54..db17c52fa35db 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -3212,14 +3212,23 @@ static void bpf_tracing_link_release(struct bpf_link *link)
>  {
>  	struct bpf_tracing_link *tr_link =
>  		container_of(link, struct bpf_tracing_link, link.link);
> -
> -	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
> -						tr_link->trampoline));
> +	struct bpf_prog *tgt_prog = tr_link->tgt_prog;
> +
> +	if (link->prog->type == BPF_PROG_TYPE_EXT) {
> +		mutex_lock(&tgt_prog->aux->ext_mutex);
> +		WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
> +							tr_link->trampoline));
> +		tgt_prog->aux->is_extended = false;

In case if unlink fails is_extended should not be reset.

> +		mutex_unlock(&tgt_prog->aux->ext_mutex);
> +	} else {
> +		WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
> +							tr_link->trampoline));
> +	}
>  
>  	bpf_trampoline_put(tr_link->trampoline);
>  
>  	/* tgt_prog is NULL if target is a kernel function */
> -	if (tr_link->tgt_prog)
> +	if (tgt_prog)
>  		bpf_prog_put(tr_link->tgt_prog);
>  }

[...]
Leon Hwang Oct. 1, 2024, 1:20 p.m. UTC | #2
On 2024/10/1 19:13, Eduard Zingerman wrote:
> On Sun, 2024-09-29 at 21:27 +0800, Leon Hwang wrote:
> 
> [...]
> 
>> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
>> index 79660e3fca4c1..4a4de4f014be9 100644
>> --- a/kernel/bpf/arraymap.c
>> +++ b/kernel/bpf/arraymap.c
>> @@ -947,16 +947,29 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
>>  				   struct file *map_file, int fd)
>>  {
>>  	struct bpf_prog *prog = bpf_prog_get(fd);
>> +	bool is_extended;
>>  
>>  	if (IS_ERR(prog))
>>  		return prog;
>>  
>> -	if (!bpf_prog_map_compatible(map, prog)) {
>> -		bpf_prog_put(prog);
>> -		return ERR_PTR(-EINVAL);
>> -	}
>> +	if (!bpf_prog_map_compatible(map, prog))
>> +		goto out_put_prog;
>> +
>> +	mutex_lock(&prog->aux->ext_mutex);
>> +	is_extended = prog->aux->is_extended;
>> +	mutex_unlock(&prog->aux->ext_mutex);
>> +	if (is_extended)
>> +		/* Extended prog can not be tail callee. It's to prevent a
>> +		 * potential infinite loop like:
>> +		 * tail callee prog entry -> tail callee prog subprog ->
>> +		 * freplace prog entry --tailcall-> tail callee prog entry.
>> +		 */
>> +		goto out_put_prog;
> 
> Nit: I think return value should be -EBUSY in this case.

Ack.

> 
>>  
>>  	return prog;
>> +out_put_prog:
>> +	bpf_prog_put(prog);
>> +	return ERR_PTR(-EINVAL);
>>  }
>>
> 
> [...]
> 
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index a8f1808a1ca54..db17c52fa35db 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -3212,14 +3212,23 @@ static void bpf_tracing_link_release(struct bpf_link *link)
>>  {
>>  	struct bpf_tracing_link *tr_link =
>>  		container_of(link, struct bpf_tracing_link, link.link);
>> -
>> -	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
>> -						tr_link->trampoline));
>> +	struct bpf_prog *tgt_prog = tr_link->tgt_prog;
>> +
>> +	if (link->prog->type == BPF_PROG_TYPE_EXT) {
>> +		mutex_lock(&tgt_prog->aux->ext_mutex);
>> +		WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
>> +							tr_link->trampoline));
>> +		tgt_prog->aux->is_extended = false;
> 
> In case if unlink fails is_extended should not be reset.
>

Nope.

In bpf_trampoline_unlink_prog(), 'tr->extension_prog = NULL;' always no
matter whether fail to unlink.

So, it should reset is_extended always too.

Thanks,
Leon

>> +		mutex_unlock(&tgt_prog->aux->ext_mutex);
>> +	} else {
>> +		WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
>> +							tr_link->trampoline));
>> +	}
>>  
>>  	bpf_trampoline_put(tr_link->trampoline);
>>  
>>  	/* tgt_prog is NULL if target is a kernel function */
>> -	if (tr_link->tgt_prog)
>> +	if (tgt_prog)
>>  		bpf_prog_put(tr_link->tgt_prog);
>>  }
> 
> [...]
>
Eduard Zingerman Oct. 1, 2024, 4:54 p.m. UTC | #3
On Tue, 2024-10-01 at 21:20 +0800, Leon Hwang wrote:

[...]

> > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > > index a8f1808a1ca54..db17c52fa35db 100644
> > > --- a/kernel/bpf/syscall.c
> > > +++ b/kernel/bpf/syscall.c
> > > @@ -3212,14 +3212,23 @@ static void bpf_tracing_link_release(struct bpf_link *link)
> > >  {
> > >  	struct bpf_tracing_link *tr_link =
> > >  		container_of(link, struct bpf_tracing_link, link.link);
> > > -
> > > -	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
> > > -						tr_link->trampoline));
> > > +	struct bpf_prog *tgt_prog = tr_link->tgt_prog;
> > > +
> > > +	if (link->prog->type == BPF_PROG_TYPE_EXT) {
> > > +		mutex_lock(&tgt_prog->aux->ext_mutex);
> > > +		WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
> > > +							tr_link->trampoline));
> > > +		tgt_prog->aux->is_extended = false;
> > 
> > In case if unlink fails is_extended should not be reset.
> > 
> 
> Nope.
> 
> In bpf_trampoline_unlink_prog(), 'tr->extension_prog = NULL;' always no
> matter whether fail to unlink.
> 
> So, it should reset is_extended always too.

Hm, you are correct, sorry for the noise.
It is unfortunate that these updates are separated in the code, tbh.

[...]
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 19d8ca8ac960f..aac6d2f42830c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1483,6 +1483,8 @@  struct bpf_prog_aux {
 	bool xdp_has_frags;
 	bool exception_cb;
 	bool exception_boundary;
+	bool is_extended; /* true if extended by freplace program */
+	struct mutex ext_mutex; /* mutex for is_extended */
 	struct bpf_arena *arena;
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 79660e3fca4c1..4a4de4f014be9 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -947,16 +947,29 @@  static void *prog_fd_array_get_ptr(struct bpf_map *map,
 				   struct file *map_file, int fd)
 {
 	struct bpf_prog *prog = bpf_prog_get(fd);
+	bool is_extended;
 
 	if (IS_ERR(prog))
 		return prog;
 
-	if (!bpf_prog_map_compatible(map, prog)) {
-		bpf_prog_put(prog);
-		return ERR_PTR(-EINVAL);
-	}
+	if (!bpf_prog_map_compatible(map, prog))
+		goto out_put_prog;
+
+	mutex_lock(&prog->aux->ext_mutex);
+	is_extended = prog->aux->is_extended;
+	mutex_unlock(&prog->aux->ext_mutex);
+	if (is_extended)
+		/* Extended prog can not be tail callee. It's to prevent a
+		 * potential infinite loop like:
+		 * tail callee prog entry -> tail callee prog subprog ->
+		 * freplace prog entry --tailcall-> tail callee prog entry.
+		 */
+		goto out_put_prog;
 
 	return prog;
+out_put_prog:
+	bpf_prog_put(prog);
+	return ERR_PTR(-EINVAL);
 }
 
 static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4e07cc057d6f2..ea7f59374b378 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -131,6 +131,7 @@  struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
 	INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode);
 #endif
 	mutex_init(&fp->aux->used_maps_mutex);
+	mutex_init(&fp->aux->ext_mutex);
 	mutex_init(&fp->aux->dst_mutex);
 
 	return fp;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a8f1808a1ca54..db17c52fa35db 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3212,14 +3212,23 @@  static void bpf_tracing_link_release(struct bpf_link *link)
 {
 	struct bpf_tracing_link *tr_link =
 		container_of(link, struct bpf_tracing_link, link.link);
-
-	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
-						tr_link->trampoline));
+	struct bpf_prog *tgt_prog = tr_link->tgt_prog;
+
+	if (link->prog->type == BPF_PROG_TYPE_EXT) {
+		mutex_lock(&tgt_prog->aux->ext_mutex);
+		WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
+							tr_link->trampoline));
+		tgt_prog->aux->is_extended = false;
+		mutex_unlock(&tgt_prog->aux->ext_mutex);
+	} else {
+		WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
+							tr_link->trampoline));
+	}
 
 	bpf_trampoline_put(tr_link->trampoline);
 
 	/* tgt_prog is NULL if target is a kernel function */
-	if (tr_link->tgt_prog)
+	if (tgt_prog)
 		bpf_prog_put(tr_link->tgt_prog);
 }
 
@@ -3270,6 +3279,24 @@  static const struct bpf_link_ops bpf_tracing_link_lops = {
 	.fill_link_info = bpf_tracing_link_fill_link_info,
 };
 
+static int bpf_extend_prog(struct bpf_tracing_link *link,
+			   struct bpf_trampoline *tr,
+			   struct bpf_prog *tgt_prog)
+{
+	struct bpf_prog_aux *aux = tgt_prog->aux;
+	int err = 0;
+
+	mutex_lock(&aux->ext_mutex);
+	err = bpf_trampoline_link_prog(&link->link, tr);
+	if (err)
+		goto out_unlock;
+
+	aux->is_extended = true;
+out_unlock:
+	mutex_unlock(&aux->ext_mutex);
+	return err;
+}
+
 static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 				   int tgt_prog_fd,
 				   u32 btf_id,
@@ -3354,7 +3381,7 @@  static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 	 *   in prog->aux
 	 *
 	 * - if prog->aux->dst_trampoline is NULL, the program has already been
-         *   attached to a target and its initial target was cleared (below)
+	 *   attached to a target and its initial target was cleared (below)
 	 *
 	 * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
 	 *   target_btf_id using the link_create API.
@@ -3429,7 +3456,10 @@  static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 	if (err)
 		goto out_unlock;
 
-	err = bpf_trampoline_link_prog(&link->link, tr);
+	if (prog->type == BPF_PROG_TYPE_EXT)
+		err = bpf_extend_prog(link, tr, tgt_prog);
+	else
+		err = bpf_trampoline_link_prog(&link->link, tr);
 	if (err) {
 		bpf_link_cleanup(&link_primer);
 		link = NULL;