diff mbox series

[bpf-next,v7,1/2] bpf: Prevent tailcall infinite loop caused by freplace

Message ID 20241010153835.26984-2-leon.hwang@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Fix tailcall infinite loop caused by freplace | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 206 this patch: 206
netdev/build_tools success Errors and warnings before: 0 (+1) this patch: 0 (+1)
netdev/cc_maintainers warning 7 maintainers not CCed: john.fastabend@gmail.com sdf@fomichev.me martin.lau@linux.dev song@kernel.org haoluo@google.com kpsingh@kernel.org jolsa@kernel.org
netdev/build_clang success Errors and warnings before: 257 this patch: 257
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6961 this patch: 6961
netdev/checkpatch warning WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0

Commit Message

Leon Hwang Oct. 10, 2024, 3:38 p.m. UTC
There is a potential infinite loop issue that can occur when using a
combination of tail calls and freplace.

In an upcoming selftest, the attach target for entry_freplace of
tailcall_freplace.c is subprog_tc of tc_bpf2bpf.c, while the tail call in
entry_freplace leads to entry_tc. This results in an infinite loop:

entry_tc -> subprog_tc -> entry_freplace --tailcall-> entry_tc.

The problem arises because the tail_call_cnt in entry_freplace resets to
zero each time entry_freplace is executed, causing the tail call mechanism
to never terminate, eventually leading to a kernel panic.

To fix this issue, the solution is twofold:

1. Prevent updating a program extended by an freplace program to a
   prog_array map.
2. Prevent extending a program that is already part of a prog_array map
   with an freplace program.

This ensures that:

* If a program or its subprogram has been extended by an freplace program,
  it can no longer be updated to a prog_array map.
* If a program has been added to a prog_array map, neither it nor its
  subprograms can be extended by an freplace program.

Additionally, fix a minor code style issue by replacing eight spaces with a
tab for proper formatting.

Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/bpf.h     | 17 +++++++++++++----
 kernel/bpf/arraymap.c   | 23 ++++++++++++++++++++++-
 kernel/bpf/core.c       |  1 +
 kernel/bpf/syscall.c    |  7 ++++---
 kernel/bpf/trampoline.c | 37 +++++++++++++++++++++++++++++--------
 5 files changed, 69 insertions(+), 16 deletions(-)

Comments

Alexei Starovoitov Oct. 10, 2024, 5:09 p.m. UTC | #1
On Thu, Oct 10, 2024 at 8:39 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> -static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
> +static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
> +                                     struct bpf_trampoline *tr,
> +                                     struct bpf_prog *tgt_prog)
>  {
>         enum bpf_tramp_prog_type kind;
>         struct bpf_tramp_link *link_exiting;
> @@ -544,6 +546,17 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr
>                 /* Cannot attach extension if fentry/fexit are in use. */
>                 if (cnt)
>                         return -EBUSY;
> +               guard(mutex)(&tgt_prog->aux->ext_mutex);
> +               if (tgt_prog->aux->prog_array_member_cnt)
> +                       /* Program extensions can not extend target prog when
> +                        * the target prog has been updated to any prog_array
> +                        * map as tail callee. It's to prevent a potential
> +                        * infinite loop like:
> +                        * tgt prog entry -> tgt prog subprog -> freplace prog
> +                        * entry --tailcall-> tgt prog entry.
> +                        */
> +                       return -EBUSY;
> +               tgt_prog->aux->is_extended = true;
>                 tr->extension_prog = link->link.prog;
>                 return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
>                                           link->link.prog->bpf_func);

The suggestion to use guard(mutex) shouldn't be applied mindlessly.
Here you extend the mutex holding range all the way through
bpf_arch_text_poke().
This is wrong.

>         if (kind == BPF_TRAMP_REPLACE) {
>                 WARN_ON_ONCE(!tr->extension_prog);
> +               guard(mutex)(&tgt_prog->aux->ext_mutex);
>                 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
>                                          tr->extension_prog->bpf_func, NULL);
>                 tr->extension_prog = NULL;
> +               tgt_prog->aux->is_extended = false;
>                 return err;

Same here. Clearly wrong to grab the mutex for the duration of poke.

Also Xu's suggestion makes sense to me.
"extension prog should not be tailcalled independently"

So I would disable such case as a part of this patch as well.

pw-bot: cr
Leon Hwang Oct. 11, 2024, 3:27 a.m. UTC | #2
On 11/10/24 01:09, Alexei Starovoitov wrote:
> On Thu, Oct 10, 2024 at 8:39 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>> -static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
>> +static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
>> +                                     struct bpf_trampoline *tr,
>> +                                     struct bpf_prog *tgt_prog)
>>  {
>>         enum bpf_tramp_prog_type kind;
>>         struct bpf_tramp_link *link_exiting;
>> @@ -544,6 +546,17 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr
>>                 /* Cannot attach extension if fentry/fexit are in use. */
>>                 if (cnt)
>>                         return -EBUSY;
>> +               guard(mutex)(&tgt_prog->aux->ext_mutex);
>> +               if (tgt_prog->aux->prog_array_member_cnt)
>> +                       /* Program extensions can not extend target prog when
>> +                        * the target prog has been updated to any prog_array
>> +                        * map as tail callee. It's to prevent a potential
>> +                        * infinite loop like:
>> +                        * tgt prog entry -> tgt prog subprog -> freplace prog
>> +                        * entry --tailcall-> tgt prog entry.
>> +                        */
>> +                       return -EBUSY;
>> +               tgt_prog->aux->is_extended = true;
>>                 tr->extension_prog = link->link.prog;
>>                 return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
>>                                           link->link.prog->bpf_func);
> 
> The suggestion to use guard(mutex) shouldn't be applied mindlessly.
> Here you extend the mutex holding range all the way through
> bpf_arch_text_poke().
> This is wrong.
> 

Understood. The guard(mutex) should indeed limit the mutex holding range
to as small as possible. I’ll adjust accordingly.

>>         if (kind == BPF_TRAMP_REPLACE) {
>>                 WARN_ON_ONCE(!tr->extension_prog);
>> +               guard(mutex)(&tgt_prog->aux->ext_mutex);
>>                 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
>>                                          tr->extension_prog->bpf_func, NULL);
>>                 tr->extension_prog = NULL;
>> +               tgt_prog->aux->is_extended = false;
>>                 return err;
> 
> Same here. Clearly wrong to grab the mutex for the duration of poke.
> 

Ack.

> Also Xu's suggestion makes sense to me.
> "extension prog should not be tailcalled independently"
> 
> So I would disable such case as a part of this patch as well.
> 

I’m fine with adding this restriction.

However, it will break a use case that works on the 5.15 kernel:

libxdp XDP dispatcher --> subprog --> freplace A --tailcall-> freplace B.

With this limitation, the chain 'freplace A --tailcall-> freplace B'
will no longer work.

To comply with the new restriction, the use case would need to be
updated to:

libxdp XDP dispatcher --> subprog --> freplace A --tailcall-> XDP B.

> pw-bot: cr

Thanks,
Leon
Alexei Starovoitov Oct. 11, 2024, 3:50 p.m. UTC | #3
On Thu, Oct 10, 2024 at 8:27 PM Leon Hwang <leon.hwang@linux.dev> wrote:
>
>
>
> On 11/10/24 01:09, Alexei Starovoitov wrote:
> > On Thu, Oct 10, 2024 at 8:39 AM Leon Hwang <leon.hwang@linux.dev> wrote:
> >>
> >> -static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
> >> +static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
> >> +                                     struct bpf_trampoline *tr,
> >> +                                     struct bpf_prog *tgt_prog)
> >>  {
> >>         enum bpf_tramp_prog_type kind;
> >>         struct bpf_tramp_link *link_exiting;
> >> @@ -544,6 +546,17 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr
> >>                 /* Cannot attach extension if fentry/fexit are in use. */
> >>                 if (cnt)
> >>                         return -EBUSY;
> >> +               guard(mutex)(&tgt_prog->aux->ext_mutex);
> >> +               if (tgt_prog->aux->prog_array_member_cnt)
> >> +                       /* Program extensions can not extend target prog when
> >> +                        * the target prog has been updated to any prog_array
> >> +                        * map as tail callee. It's to prevent a potential
> >> +                        * infinite loop like:
> >> +                        * tgt prog entry -> tgt prog subprog -> freplace prog
> >> +                        * entry --tailcall-> tgt prog entry.
> >> +                        */
> >> +                       return -EBUSY;
> >> +               tgt_prog->aux->is_extended = true;
> >>                 tr->extension_prog = link->link.prog;
> >>                 return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
> >>                                           link->link.prog->bpf_func);
> >
> > The suggestion to use guard(mutex) shouldn't be applied mindlessly.
> > Here you extend the mutex holding range all the way through
> > bpf_arch_text_poke().
> > This is wrong.
> >
>
> Understood. The guard(mutex) should indeed limit the mutex holding range
> to as small as possible. I’ll adjust accordingly.
>
> >>         if (kind == BPF_TRAMP_REPLACE) {
> >>                 WARN_ON_ONCE(!tr->extension_prog);
> >> +               guard(mutex)(&tgt_prog->aux->ext_mutex);
> >>                 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
> >>                                          tr->extension_prog->bpf_func, NULL);
> >>                 tr->extension_prog = NULL;
> >> +               tgt_prog->aux->is_extended = false;
> >>                 return err;
> >
> > Same here. Clearly wrong to grab the mutex for the duration of poke.
> >
>
> Ack.
>
> > Also Xu's suggestion makes sense to me.
> > "extension prog should not be tailcalled independently"
> >
> > So I would disable such case as a part of this patch as well.
> >
>
> I’m fine with adding this restriction.
>
> However, it will break a use case that works on the 5.15 kernel:
>
> libxdp XDP dispatcher --> subprog --> freplace A --tailcall-> freplace B.
>
> With this limitation, the chain 'freplace A --tailcall-> freplace B'
> will no longer work.
>
> To comply with the new restriction, the use case would need to be
> updated to:
>
> libxdp XDP dispatcher --> subprog --> freplace A --tailcall-> XDP B.

I don't believe libxdp is doing anything like this.
It makes no sense to load PROG_TYPE_EXT that is supposed to freplace
another subprog and _not_ proceed with the actual replacement.

tail_call-ing into EXT prog directly is likely very broken.
EXT prog doesn't have to have ctx.
Its arguments match the target global subprog which may not have ctx at all.

So it's not about disabling, it's fixing the bug.
Leon Hwang Oct. 14, 2024, 1:17 p.m. UTC | #4
On 2024/10/11 23:50, Alexei Starovoitov wrote:
> On Thu, Oct 10, 2024 at 8:27 PM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>>
>>
>> On 11/10/24 01:09, Alexei Starovoitov wrote:
>>> On Thu, Oct 10, 2024 at 8:39 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>>>
>>> Also Xu's suggestion makes sense to me.
>>> "extension prog should not be tailcalled independently"
>>>
>>> So I would disable such case as a part of this patch as well.
>>>
>>
>> I’m fine with adding this restriction.
>>
>> However, it will break a use case that works on the 5.15 kernel:
>>
>> libxdp XDP dispatcher --> subprog --> freplace A --tailcall-> freplace B.
>>
>> With this limitation, the chain 'freplace A --tailcall-> freplace B'
>> will no longer work.
>>
>> To comply with the new restriction, the use case would need to be
>> updated to:
>>
>> libxdp XDP dispatcher --> subprog --> freplace A --tailcall-> XDP B.
> 
> I don't believe libxdp is doing anything like this.
> It makes no sense to load PROG_TYPE_EXT that is supposed to freplace
> another subprog and _not_ proceed with the actual replacement.
> 

Without the new restriction, it’s difficult to prevent such a use case,
even if it’s not aligned with the intended design of freplace.

> tail_call-ing into EXT prog directly is likely very broken.
> EXT prog doesn't have to have ctx.
> Its arguments match the target global subprog which may not have ctx at all.
> 

Let me share a simple example of the use case in question:

In the XDP program:

__noinline int
int subprog_xdp(struct xdp_md *xdp)
{
 	return xdp ? XDP_PASS : XDP_ABORTED;
}

SEC("xdp")
int entry_xdp(struct xdp_md *xdp)
{
	return subprog_xdp(xdp);
}

In the freplace entry:

struct {
	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
	__uint(max_entries, 1);
	__uint(key_size, sizeof(__u32));
	__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");

SEC("freplace")
int entry_freplace(struct xdp_md *xdp)
{
	int ret = XDP_PASS;

	bpf_tail_call_static(xdp, &jmp_table, 0);
	__sink(ret);
	return ret;
}

In the freplace tail callee:

SEC("freplace")
int entry_tailcallee(struct xdp_md *xdp)
{
	return XDP_PASS;
}

In this case, the attach target of entry_freplace is subprog_xdp, and
the tail call target of entry_freplace is entry_tailcallee. The attach
target of entry_tailcallee is entry_xdp, but it doesn't proceed with the
actual replacement. As a result, the call chain becomes:

entry_xdp -> subprog_xdp -> entry_freplace --tailcall-> entry_tailcallee.

> So it's not about disabling, it's fixing the bug.

Indeed, let us proceed with implementing the change.

Thanks,
Leon
Alexei Starovoitov Oct. 15, 2024, 2 a.m. UTC | #5
On Mon, Oct 14, 2024 at 6:17 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
>
>
> On 2024/10/11 23:50, Alexei Starovoitov wrote:
> > On Thu, Oct 10, 2024 at 8:27 PM Leon Hwang <leon.hwang@linux.dev> wrote:
> >>
> >>
> >>
> >> On 11/10/24 01:09, Alexei Starovoitov wrote:
> >>> On Thu, Oct 10, 2024 at 8:39 AM Leon Hwang <leon.hwang@linux.dev> wrote:
> >>>>
> >>> Also Xu's suggestion makes sense to me.
> >>> "extension prog should not be tailcalled independently"
> >>>
> >>> So I would disable such case as a part of this patch as well.
> >>>
> >>
> >> I’m fine with adding this restriction.
> >>
> >> However, it will break a use case that works on the 5.15 kernel:
> >>
> >> libxdp XDP dispatcher --> subprog --> freplace A --tailcall-> freplace B.
> >>
> >> With this limitation, the chain 'freplace A --tailcall-> freplace B'
> >> will no longer work.
> >>
> >> To comply with the new restriction, the use case would need to be
> >> updated to:
> >>
> >> libxdp XDP dispatcher --> subprog --> freplace A --tailcall-> XDP B.
> >
> > I don't believe libxdp is doing anything like this.
> > It makes no sense to load PROG_TYPE_EXT that is supposed to freplace
> > another subprog and _not_ proceed with the actual replacement.
> >
>
> Without the new restriction, it’s difficult to prevent such a use case,
> even if it’s not aligned with the intended design of freplace.
>
> > tail_call-ing into EXT prog directly is likely very broken.
> > EXT prog doesn't have to have ctx.
> > Its arguments match the target global subprog which may not have ctx at all.
> >
>
> Let me share a simple example of the use case in question:
>
> In the XDP program:
>
> __noinline int
> int subprog_xdp(struct xdp_md *xdp)
> {
>         return xdp ? XDP_PASS : XDP_ABORTED;
> }
>
> SEC("xdp")
> int entry_xdp(struct xdp_md *xdp)
> {
>         return subprog_xdp(xdp);
> }
>
> In the freplace entry:
>
> struct {
>         __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
>         __uint(max_entries, 1);
>         __uint(key_size, sizeof(__u32));
>         __uint(value_size, sizeof(__u32));
> } jmp_table SEC(".maps");
>
> SEC("freplace")
> int entry_freplace(struct xdp_md *xdp)
> {
>         int ret = XDP_PASS;
>
>         bpf_tail_call_static(xdp, &jmp_table, 0);
>         __sink(ret);
>         return ret;
> }
>
> In the freplace tail callee:
>
> SEC("freplace")
> int entry_tailcallee(struct xdp_md *xdp)
> {
>         return XDP_PASS;
> }
>
> In this case, the attach target of entry_freplace is subprog_xdp, and
> the tail call target of entry_freplace is entry_tailcallee. The attach
> target of entry_tailcallee is entry_xdp, but it doesn't proceed with the
> actual replacement. As a result, the call chain becomes:
>
> entry_xdp -> subprog_xdp -> entry_freplace --tailcall-> entry_tailcallee.

exactly. above makes no practical application.
It's only a headache for the verifier and source of obscure bugs.

>
> > So it's not about disabling, it's fixing the bug.
>
> Indeed, let us proceed with implementing the change.

yep.
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 19d8ca8ac960f..0c216e71cec76 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1292,8 +1292,12 @@  void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
 bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
 
 #ifdef CONFIG_BPF_JIT
-int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
-int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
+int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+			     struct bpf_trampoline *tr,
+			     struct bpf_prog *tgt_prog);
+int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+			       struct bpf_trampoline *tr,
+			       struct bpf_prog *tgt_prog);
 struct bpf_trampoline *bpf_trampoline_get(u64 key,
 					  struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
@@ -1374,12 +1378,14 @@  void bpf_jit_uncharge_modmem(u32 size);
 bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
 #else
 static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
-					   struct bpf_trampoline *tr)
+					   struct bpf_trampoline *tr,
+					   struct bpf_prog *tgt_prog)
 {
 	return -ENOTSUPP;
 }
 static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
-					     struct bpf_trampoline *tr)
+					     struct bpf_trampoline *tr,
+					     struct bpf_prog *tgt_prog)
 {
 	return -ENOTSUPP;
 }
@@ -1483,6 +1489,9 @@  struct bpf_prog_aux {
 	bool xdp_has_frags;
 	bool exception_cb;
 	bool exception_boundary;
+	bool is_extended; /* true if extended by freplace program */
+	u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
+	struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
 	struct bpf_arena *arena;
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 79660e3fca4c1..f9bd63a74eee7 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -947,6 +947,7 @@  static void *prog_fd_array_get_ptr(struct bpf_map *map,
 				   struct file *map_file, int fd)
 {
 	struct bpf_prog *prog = bpf_prog_get(fd);
+	bool is_extended;
 
 	if (IS_ERR(prog))
 		return prog;
@@ -956,13 +957,33 @@  static void *prog_fd_array_get_ptr(struct bpf_map *map,
 		return ERR_PTR(-EINVAL);
 	}
 
+	mutex_lock(&prog->aux->ext_mutex);
+	is_extended = prog->aux->is_extended;
+	if (!is_extended)
+		prog->aux->prog_array_member_cnt++;
+	mutex_unlock(&prog->aux->ext_mutex);
+	if (is_extended) {
+		/* Extended prog can not be tail callee. It's to prevent a
+		 * potential infinite loop like:
+		 * tail callee prog entry -> tail callee prog subprog ->
+		 * freplace prog entry --tailcall-> tail callee prog entry.
+		 */
+		bpf_prog_put(prog);
+		return ERR_PTR(-EBUSY);
+	}
+
 	return prog;
 }
 
 static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
 {
+	struct bpf_prog *prog = ptr;
+
+	mutex_lock(&prog->aux->ext_mutex);
+	prog->aux->prog_array_member_cnt--;
+	mutex_unlock(&prog->aux->ext_mutex);
 	/* bpf_prog is freed after one RCU or tasks trace grace period */
-	bpf_prog_put(ptr);
+	bpf_prog_put(prog);
 }
 
 static u32 prog_fd_array_sys_lookup_elem(void *ptr)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 5e77c58e06010..233ea78f8f1bd 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -131,6 +131,7 @@  struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
 	INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode);
 #endif
 	mutex_init(&fp->aux->used_maps_mutex);
+	mutex_init(&fp->aux->ext_mutex);
 	mutex_init(&fp->aux->dst_mutex);
 
 	return fp;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a8f1808a1ca54..4d04d4d9c1f30 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3214,7 +3214,8 @@  static void bpf_tracing_link_release(struct bpf_link *link)
 		container_of(link, struct bpf_tracing_link, link.link);
 
 	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
-						tr_link->trampoline));
+						tr_link->trampoline,
+						tr_link->tgt_prog));
 
 	bpf_trampoline_put(tr_link->trampoline);
 
@@ -3354,7 +3355,7 @@  static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 	 *   in prog->aux
 	 *
 	 * - if prog->aux->dst_trampoline is NULL, the program has already been
-         *   attached to a target and its initial target was cleared (below)
+	 *   attached to a target and its initial target was cleared (below)
 	 *
 	 * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
 	 *   target_btf_id using the link_create API.
@@ -3429,7 +3430,7 @@  static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 	if (err)
 		goto out_unlock;
 
-	err = bpf_trampoline_link_prog(&link->link, tr);
+	err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog);
 	if (err) {
 		bpf_link_cleanup(&link_primer);
 		link = NULL;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index f8302a5ca400d..99b86e3b28a41 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -523,7 +523,9 @@  static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
 	}
 }
 
-static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+				      struct bpf_trampoline *tr,
+				      struct bpf_prog *tgt_prog)
 {
 	enum bpf_tramp_prog_type kind;
 	struct bpf_tramp_link *link_exiting;
@@ -544,6 +546,17 @@  static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr
 		/* Cannot attach extension if fentry/fexit are in use. */
 		if (cnt)
 			return -EBUSY;
+		guard(mutex)(&tgt_prog->aux->ext_mutex);
+		if (tgt_prog->aux->prog_array_member_cnt)
+			/* Program extensions can not extend target prog when
+			 * the target prog has been updated to any prog_array
+			 * map as tail callee. It's to prevent a potential
+			 * infinite loop like:
+			 * tgt prog entry -> tgt prog subprog -> freplace prog
+			 * entry --tailcall-> tgt prog entry.
+			 */
+			return -EBUSY;
+		tgt_prog->aux->is_extended = true;
 		tr->extension_prog = link->link.prog;
 		return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
 					  link->link.prog->bpf_func);
@@ -570,17 +583,21 @@  static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr
 	return err;
 }
 
-int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+			     struct bpf_trampoline *tr,
+			     struct bpf_prog *tgt_prog)
 {
 	int err;
 
 	mutex_lock(&tr->mutex);
-	err = __bpf_trampoline_link_prog(link, tr);
+	err = __bpf_trampoline_link_prog(link, tr, tgt_prog);
 	mutex_unlock(&tr->mutex);
 	return err;
 }
 
-static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+					struct bpf_trampoline *tr,
+					struct bpf_prog *tgt_prog)
 {
 	enum bpf_tramp_prog_type kind;
 	int err;
@@ -588,9 +605,11 @@  static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_
 	kind = bpf_attach_type_to_tramp(link->link.prog);
 	if (kind == BPF_TRAMP_REPLACE) {
 		WARN_ON_ONCE(!tr->extension_prog);
+		guard(mutex)(&tgt_prog->aux->ext_mutex);
 		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
 					 tr->extension_prog->bpf_func, NULL);
 		tr->extension_prog = NULL;
+		tgt_prog->aux->is_extended = false;
 		return err;
 	}
 	hlist_del_init(&link->tramp_hlist);
@@ -599,12 +618,14 @@  static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_
 }
 
 /* bpf_trampoline_unlink_prog() should never fail. */
-int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+			       struct bpf_trampoline *tr,
+			       struct bpf_prog *tgt_prog)
 {
 	int err;
 
 	mutex_lock(&tr->mutex);
-	err = __bpf_trampoline_unlink_prog(link, tr);
+	err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog);
 	mutex_unlock(&tr->mutex);
 	return err;
 }
@@ -619,7 +640,7 @@  static void bpf_shim_tramp_link_release(struct bpf_link *link)
 	if (!shim_link->trampoline)
 		return;
 
-	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline));
+	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline, NULL));
 	bpf_trampoline_put(shim_link->trampoline);
 }
 
@@ -733,7 +754,7 @@  int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
 		goto err;
 	}
 
-	err = __bpf_trampoline_link_prog(&shim_link->link, tr);
+	err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL);
 	if (err)
 		goto err;