diff mbox series

[bpf,3/4] bpf: track changes_pkt_data property for global functions

Message ID 20241206040307.568065-4-eddyz87@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: track changes_pkt_data property for global functions | expand

Checks

Context Check Description
bpf/vmtest-bpf-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present fail Series targets non-next tree, but doesn't contain any Fixes tags
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3 this patch: 3
netdev/build_tools success Errors and warnings before: 0 (+0) this patch: 0 (+0)
netdev/cc_maintainers warning 6 maintainers not CCed: haoluo@google.com jolsa@kernel.org kpsingh@kernel.org song@kernel.org sdf@fomichev.me john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 3 this patch: 3
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 67 this patch: 67
netdev/checkpatch warning WARNING: line length of 84 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-10 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-13 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-11 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-VM_Test-27 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-VM_Test-30 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-31 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-21 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-VM_Test-36 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-19 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-18 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-VM_Test-17 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-VM_Test-37 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-VM_Test-22 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-38 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-39 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-VM_Test-45 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-VM_Test-46 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-35 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-42 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-34 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17

Commit Message

Eduard Zingerman Dec. 6, 2024, 4:03 a.m. UTC
When processing calls to certain helpers, verifier invalidates all
packet pointers in a current state. For example, consider the
following program:

    __attribute__((__noinline__))
    long skb_pull_data(struct __sk_buff *sk, __u32 len)
    {
        return bpf_skb_pull_data(sk, len);
    }

    SEC("tc")
    int test_invalidate_checks(struct __sk_buff *sk)
    {
        int *p = (void *)(long)sk->data;
        if ((void *)(p + 1) > (void *)(long)sk->data_end) return TCX_DROP;
        skb_pull_data(sk, 0);
        *p = 42;
        return TCX_PASS;
    }

After a call to bpf_skb_pull_data() the pointer 'p' can't be used
safely. See function filter.c:bpf_helper_changes_pkt_data() for a list
of such helpers.

At the moment verifier does packet pointers invalidation only upon
processing calls to helper functions. This means that calls to helpers
done from global sub-programs do not invalidate pointers in the caller
state. E.g. the following program above is unsafe, but is not rejected
by verifier.

This commit fixes the omission by computing field
bpf_subprog_info->changes_pkt_data for each sub-program before main
verification pass.
changes_pkt_data should be set if:
- subprogram calls helper for which bpf_helper_changes_pkt_data
  returns true;
- subprogram calls a global function,
  for which bpf_subprog_info->changes_pkt_data should be set.

The verifier.c:check_cfg() pass is modified to compute this
information. The commit relies on depth first instruction traversal
done by check_cfg() and absence of recursive function calls:
- check_cfg() would eventually visit every call to subprogram S in a
  state when S is fully explored;
- when S is fully explored:
  - every direct helper call within S is explored
    (and thus changes_pkt_data is set if needed);
  - every call to subprogram S1 called by S was visited with S1 fully
    explored (and thus S inherits changes_pkt_data from S1).

The downside of such approach is that dead code elimination is not
taken into account: if a helper call inside global function is dead
because of current configuration, verifier would conservatively assume
that the call occurs for the purpose of the changes_pkt_data
computation.

Reported-by: Nick Zavaritsky <mejedi@gmail.com>
Closes: https://lore.kernel.org/bpf/0498CA22-5779-4767-9C0C-A9515CEA711F@gmail.com/
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 include/linux/bpf_verifier.h |  1 +
 kernel/bpf/verifier.c        | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

Comments

Alexei Starovoitov Dec. 6, 2024, 8:43 p.m. UTC | #1
On Thu, Dec 5, 2024 at 8:03 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index f4290c179bee..48b7b2eeb7e2 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -659,6 +659,7 @@ struct bpf_subprog_info {
>         bool args_cached: 1;
>         /* true if bpf_fastcall stack region is used by functions that can't be inlined */
>         bool keep_fastcall_stack: 1;
> +       bool changes_pkt_data: 1;

since freplace was brought up in the other thread.
Let's fix it all in one patch.
I think propagating changes_pkt_data flag into prog_aux and
into map->owner should do it.
The handling will be similar to existing xdp_has_frags.

Otherwise tail_call from static subprog will have the same issue.
xdp_has_frags compatibility requires equality. All progs either
have it or don't.
changes_pkt_data flag doesn't need to be that strict:
A prog with changes_pkt_data can be freplaced by prog without
and tailcall into prog without it.
But not the other way around.
Eduard Zingerman Dec. 6, 2024, 9:35 p.m. UTC | #2
On Fri, 2024-12-06 at 12:43 -0800, Alexei Starovoitov wrote:
> On Thu, Dec 5, 2024 at 8:03 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > 
> > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > index f4290c179bee..48b7b2eeb7e2 100644
> > --- a/include/linux/bpf_verifier.h
> > +++ b/include/linux/bpf_verifier.h
> > @@ -659,6 +659,7 @@ struct bpf_subprog_info {
> >         bool args_cached: 1;
> >         /* true if bpf_fastcall stack region is used by functions that can't be inlined */
> >         bool keep_fastcall_stack: 1;
> > +       bool changes_pkt_data: 1;
> 
> since freplace was brought up in the other thread.
> Let's fix it all in one patch.
> I think propagating changes_pkt_data flag into prog_aux and
> into map->owner should do it.
> The handling will be similar to existing xdp_has_frags.
> 
> Otherwise tail_call from static subprog will have the same issue.
> xdp_has_frags compatibility requires equality. All progs either
> have it or don't.
> changes_pkt_data flag doesn't need to be that strict:
> A prog with changes_pkt_data can be freplaced by prog without
> and tailcall into prog without it.
> But not the other way around.

Ack, will do.

(Note: the change Andrii suggested with change to global subprogram
       visit order looks plausible and not hard to implement,
       after I though about it a bit more).
Eduard Zingerman Dec. 9, 2024, 7:40 a.m. UTC | #3
On Fri, 2024-12-06 at 12:43 -0800, Alexei Starovoitov wrote:
> On Thu, Dec 5, 2024 at 8:03 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
> >
> > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > index f4290c179bee..48b7b2eeb7e2 100644
> > --- a/include/linux/bpf_verifier.h
> > +++ b/include/linux/bpf_verifier.h
> > @@ -659,6 +659,7 @@ struct bpf_subprog_info {
> >         bool args_cached: 1;
> >         /* true if bpf_fastcall stack region is used by functions that can't be inlined */
> >         bool keep_fastcall_stack: 1;
> > +       bool changes_pkt_data: 1;
>
> since freplace was brought up in the other thread.
> Let's fix it all in one patch.
> I think propagating changes_pkt_data flag into prog_aux and
> into map->owner should do it.
> The handling will be similar to existing xdp_has_frags.
>
> Otherwise tail_call from static subprog will have the same issue.
> xdp_has_frags compatibility requires equality. All progs either
> have it or don't.
> changes_pkt_data flag doesn't need to be that strict:
> A prog with changes_pkt_data can be freplaced by prog without
> and tailcall into prog without it.
> But not the other way around.

I tried implementing this in:
https://github.com/eddyz87/bpf/tree/skb-pull-data-global-func-bug

The freplace part is simple and works as intended.

The tail call part won't work with check_cfg() based approach and
needs global functions traversal reordering Andrii talked about.
This is so, because of the need to inspect the value of register R1,
passed to the tail call helper, in order to check map owner's properties.

If the rules are simplified to consider each tail call such that
packet pointers are invalidated, the test case
tailcalls/tailcall_freplace fails. Here is how it looks:

    // tc_bpf2bpf.c
    __noinline                             freplace
    int subprog_tc(struct __sk_buff *skb) <--------.
    {                                              |
    	int ret = 1;                               |
                                                   |
    	__sink(skb);                               |
    	__sink(ret);                               |
    	return ret;                                |
    }                                              |
                                                   |
    SEC("tc")                                      |
    int entry_tc(struct __sk_buff *skb)            |
    {                                              |
    	return subprog_tc(skb);                    |
    }                                              |
                                                   |
    // tailcall_freplace.c                         |
    struct {                                       |
    	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);     |
    	__uint(max_entries, 1);                    |
    	__uint(key_size, sizeof(__u32));           |
    	__uint(value_size, sizeof(__u32));         |
    } jmp_table SEC(".maps");                      |
                                                   |
    int count = 0;                                 |
                                                   |
    SEC("freplace")                                |
    int entry_freplace(struct __sk_buff *skb) -----'
    {
    	count++;
    	bpf_tail_call_static(skb, &jmp_table, 0);
    	return count;
    }

Here 'entry_freplace' is assumed to invalidate packet data because of
the bpf_tail_call_static(), and thus it can't replace 'subprog_tc'.
There is an option to add a dummy call to bpf_skb_pull_data(),
but this operation is not a noop, as far as I can tell.

Same situation was discussed in the sub-thread regarding use of tags.
(Note: because of the tail calls, some form of changes_pkt_data effect
 propagation similar to one done in check_cfg() would be needed with
 tags as well. That, or tags would be needed not only for global
 sub-programs but also for BPF_MAP_TYPE_PROG_ARRAY maps).

---

I'll continue with global sub-programs traversal reordering and share
the implementation on Monday, to facilitate further discussion.
Alexei Starovoitov Dec. 9, 2024, 4:53 p.m. UTC | #4
On Sun, Dec 8, 2024 at 11:40 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Fri, 2024-12-06 at 12:43 -0800, Alexei Starovoitov wrote:
> > On Thu, Dec 5, 2024 at 8:03 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > >
> > > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > > index f4290c179bee..48b7b2eeb7e2 100644
> > > --- a/include/linux/bpf_verifier.h
> > > +++ b/include/linux/bpf_verifier.h
> > > @@ -659,6 +659,7 @@ struct bpf_subprog_info {
> > >         bool args_cached: 1;
> > >         /* true if bpf_fastcall stack region is used by functions that can't be inlined */
> > >         bool keep_fastcall_stack: 1;
> > > +       bool changes_pkt_data: 1;
> >
> > since freplace was brought up in the other thread.
> > Let's fix it all in one patch.
> > I think propagating changes_pkt_data flag into prog_aux and
> > into map->owner should do it.
> > The handling will be similar to existing xdp_has_frags.
> >
> > Otherwise tail_call from static subprog will have the same issue.
> > xdp_has_frags compatibility requires equality. All progs either
> > have it or don't.
> > changes_pkt_data flag doesn't need to be that strict:
> > A prog with changes_pkt_data can be freplaced by prog without
> > and tailcall into prog without it.
> > But not the other way around.
>
> I tried implementing this in:
> https://github.com/eddyz87/bpf/tree/skb-pull-data-global-func-bug
>
> The freplace part is simple and works as intended.
>
> The tail call part won't work with check_cfg() based approach and
> needs global functions traversal reordering Andrii talked about.
> This is so, because of the need to inspect the value of register R1,
> passed to the tail call helper, in order to check map owner's properties.
>
> If the rules are simplified to consider each tail call such that
> packet pointers are invalidated, the test case
> tailcalls/tailcall_freplace fails. Here is how it looks:
>
>     // tc_bpf2bpf.c
>     __noinline                             freplace
>     int subprog_tc(struct __sk_buff *skb) <--------.
>     {                                              |
>         int ret = 1;                               |
>                                                    |
>         __sink(skb);                               |
>         __sink(ret);                               |
>         return ret;                                |
>     }                                              |
>                                                    |
>     SEC("tc")                                      |
>     int entry_tc(struct __sk_buff *skb)            |
>     {                                              |
>         return subprog_tc(skb);                    |
>     }                                              |
>                                                    |
>     // tailcall_freplace.c                         |
>     struct {                                       |
>         __uint(type, BPF_MAP_TYPE_PROG_ARRAY);     |
>         __uint(max_entries, 1);                    |
>         __uint(key_size, sizeof(__u32));           |
>         __uint(value_size, sizeof(__u32));         |
>     } jmp_table SEC(".maps");                      |
>                                                    |
>     int count = 0;                                 |
>                                                    |
>     SEC("freplace")                                |
>     int entry_freplace(struct __sk_buff *skb) -----'
>     {
>         count++;
>         bpf_tail_call_static(skb, &jmp_table, 0);
>         return count;
>     }

hmm. none of the above changes pkt_data, so it should be allowed.
The prog doesn't read skb->data either.
So I don't quite see the problem.

> Here 'entry_freplace' is assumed to invalidate packet data because of
> the bpf_tail_call_static(), and thus it can't replace 'subprog_tc'.
> There is an option to add a dummy call to bpf_skb_pull_data(),
> but this operation is not a noop, as far as I can tell.

skb_pull is not, but there are plenty that are practically nop helpers.
bpf_helper_changes_pkt_data() lists them all.
Like bpf_xdp_adjust_meta(xdp, 0)

> Same situation was discussed in the sub-thread regarding use of tags.
> (Note: because of the tail calls, some form of changes_pkt_data effect
>  propagation similar to one done in check_cfg() would be needed with
>  tags as well. That, or tags would be needed not only for global
>  sub-programs but also for BPF_MAP_TYPE_PROG_ARRAY maps).

nack to tags approach.

>
> ---
>
> I'll continue with global sub-programs traversal reordering and share
> the implementation on Monday, to facilitate further discussion.
>
Eduard Zingerman Dec. 9, 2024, 5:57 p.m. UTC | #5
On Mon, 2024-12-09 at 08:53 -0800, Alexei Starovoitov wrote:

[...]

> > 
> >     // tc_bpf2bpf.c
> >     __noinline                             freplace
> >     int subprog_tc(struct __sk_buff *skb) <--------.
> >     {                                              |
> >         int ret = 1;                               |
> >                                                    |
> >         __sink(skb);                               |
> >         __sink(ret);                               |
> >         return ret;                                |
> >     }                                              |
> >                                                    |
> >     SEC("tc")                                      |
> >     int entry_tc(struct __sk_buff *skb)            |
> >     {                                              |
> >         return subprog_tc(skb);                    |
> >     }                                              |
> >                                                    |
> >     // tailcall_freplace.c                         |
> >     struct {                                       |
> >         __uint(type, BPF_MAP_TYPE_PROG_ARRAY);     |
> >         __uint(max_entries, 1);                    |
> >         __uint(key_size, sizeof(__u32));           |
> >         __uint(value_size, sizeof(__u32));         |
> >     } jmp_table SEC(".maps");                      |
> >                                                    |
> >     int count = 0;                                 |
> >                                                    |
> >     SEC("freplace")                                |
> >     int entry_freplace(struct __sk_buff *skb) -----'
> >     {
> >         count++;
> >         bpf_tail_call_static(skb, &jmp_table, 0);
> >         return count;
> >     }
> 
> hmm. none of the above changes pkt_data, so it should be allowed.
> The prog doesn't read skb->data either.
> So I don't quite see the problem.

The problem is when I use simplified rule: "every tail call changes packet data",
as a substitute for proper map content effects tracking.

If map content effects are tracked, there should be no problems
verifying this program. However, that can't be done in check_cfg(),
as it does not track register values, and register value is needed to
identify the map. Hence, mechanics with "in-line" global sub-program
traversal is needed (as described by Andrii):
- during a regular verification pass get to a global sub-program call:
  - if sub-program had not been visited yet, verify it completely
    and compute changes_pkt_data effect;
  - continue from the call-site using the computed effect;
- during a regular verification pass get to a tail call:
  - check the map pointed to by R1 to see whether it has
    changes_pkt_data effect.

> > Here 'entry_freplace' is assumed to invalidate packet data because of
> > the bpf_tail_call_static(), and thus it can't replace 'subprog_tc'.
> > There is an option to add a dummy call to bpf_skb_pull_data(),
> > but this operation is not a noop, as far as I can tell.
> 
> skb_pull is not, but there are plenty that are practically nop helpers.
> bpf_helper_changes_pkt_data() lists them all.
> Like bpf_xdp_adjust_meta(xdp, 0)
> 
> > Same situation was discussed in the sub-thread regarding use of tags.
> > (Note: because of the tail calls, some form of changes_pkt_data effect
> >  propagation similar to one done in check_cfg() would be needed with
> >  tags as well. That, or tags would be needed not only for global
> >  sub-programs but also for BPF_MAP_TYPE_PROG_ARRAY maps).
> 
> nack to tags approach.

Understood.
Alexei Starovoitov Dec. 10, 2024, 12:48 a.m. UTC | #6
On Mon, Dec 9, 2024 at 9:57 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Mon, 2024-12-09 at 08:53 -0800, Alexei Starovoitov wrote:
>
> [...]
>
> > >
> > >     // tc_bpf2bpf.c
> > >     __noinline                             freplace
> > >     int subprog_tc(struct __sk_buff *skb) <--------.
> > >     {                                              |
> > >         int ret = 1;                               |
> > >                                                    |
> > >         __sink(skb);                               |
> > >         __sink(ret);                               |
> > >         return ret;                                |
> > >     }                                              |
> > >                                                    |
> > >     SEC("tc")                                      |
> > >     int entry_tc(struct __sk_buff *skb)            |
> > >     {                                              |
> > >         return subprog_tc(skb);                    |
> > >     }                                              |
> > >                                                    |
> > >     // tailcall_freplace.c                         |
> > >     struct {                                       |
> > >         __uint(type, BPF_MAP_TYPE_PROG_ARRAY);     |
> > >         __uint(max_entries, 1);                    |
> > >         __uint(key_size, sizeof(__u32));           |
> > >         __uint(value_size, sizeof(__u32));         |
> > >     } jmp_table SEC(".maps");                      |
> > >                                                    |
> > >     int count = 0;                                 |
> > >                                                    |
> > >     SEC("freplace")                                |
> > >     int entry_freplace(struct __sk_buff *skb) -----'
> > >     {
> > >         count++;
> > >         bpf_tail_call_static(skb, &jmp_table, 0);
> > >         return count;
> > >     }
> >
> > hmm. none of the above changes pkt_data, so it should be allowed.
> > The prog doesn't read skb->data either.
> > So I don't quite see the problem.
>
> The problem is when I use simplified rule: "every tail call changes packet data",
> as a substitute for proper map content effects tracking.
>
> If map content effects are tracked, there should be no problems
> verifying this program. However, that can't be done in check_cfg(),
> as it does not track register values, and register value is needed to
> identify the map.

We don't need data flow analysis and R1 tracking.

We could do the following rule:
if prog has a tail call and any prog array map in prog->aux->used_maps
calls into prog with adjust_pkt_data assume that this prog
also adjusts pkt_data.

> Hence, mechanics with "in-line" global sub-program
> traversal is needed (as described by Andrii):
> - during a regular verification pass get to a global sub-program call:
>   - if sub-program had not been visited yet, verify it completely
>     and compute changes_pkt_data effect;
>   - continue from the call-site using the computed effect;
> - during a regular verification pass get to a tail call:
>   - check the map pointed to by R1 to see whether it has
>     changes_pkt_data effect.

I don't think we should be adding all that logic when a much simpler
rule (as described above) is good enough.

Also either inline global sub-prog traversal or the simple rule above
both suffer from the following issue:
in case prog_array is empty map->changes_pkt_data will be false.
It will be set to true only when the first
prog_fd_array_get_ptr()->bpf_prog_map_compatible()
will record changes_pkt_data from the first prog inserted in prog_array.

So main prog reading skb->data after calling subprog that tail_calls
somewhere should assume that skb->data is invalidated.

That's pretty much your rule "every tail call changes packet data".

I think we can go with this simplest approach as well.
The test you mentioned have to be adjusted. Not a big deal.

Or we can do:
"if prog_array empty assume adjusts_pkt_data == true,
otherwise adj_pkt_data | = for each map in used_maps {
map->owner.adj_pkt_data }"

The fancy inline global subprog traversal would have to have the same
"simple" (or call it dumb) rule.
So at the end both inline or check_cfg are not accurate at all,
but check_cfg approach is so much simpler.
Eduard Zingerman Dec. 10, 2024, 1:24 a.m. UTC | #7
On Mon, 2024-12-09 at 16:48 -0800, Alexei Starovoitov wrote:

[...]

> Also either inline global sub-prog traversal or the simple rule above
> both suffer from the following issue:
> in case prog_array is empty map->changes_pkt_data will be false.
> It will be set to true only when the first
> prog_fd_array_get_ptr()->bpf_prog_map_compatible()
> will record changes_pkt_data from the first prog inserted in prog_array.
> 
> So main prog reading skb->data after calling subprog that tail_calls
> somewhere should assume that skb->data is invalidated.

Yes, that's what I was planning to do.

> That's pretty much your rule "every tail call changes packet data".
> 
> I think we can go with this simplest approach as well.
> The test you mentioned have to be adjusted. Not a big deal.
> 
> Or we can do:
> "if prog_array empty assume adjusts_pkt_data == true,
> otherwise adj_pkt_data | = for each map in used_maps {
> map->owner.adj_pkt_data }"
> 
> The fancy inline global subprog traversal would have to have the same
> "simple" (or call it dumb) rule.
> So at the end both inline or check_cfg are not accurate at all,
> but check_cfg approach is so much simpler.

I don't like the '|= map->owner.adj_pkt_data' thing, tbh.
I think "any tail call changes packet data" is simpler to reason about.
But then, the question is, how to modify the test?
You suggested bpf_xdp_adjust_meta(xdp, 0) for 'xdp', and it is a good fit,
as it does not do much.
For 'skb' I don't see anything better than 'bpf_skb_pull_data(sk, 0)',
it does skb_ensure_writable(), but everything else does more.
Dedicated kfunc would be cleaner, though.
Alexei Starovoitov Dec. 10, 2024, 1:38 a.m. UTC | #8
On Mon, Dec 9, 2024 at 5:24 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Mon, 2024-12-09 at 16:48 -0800, Alexei Starovoitov wrote:
>
> [...]
>
> > Also either inline global sub-prog traversal or the simple rule above
> > both suffer from the following issue:
> > in case prog_array is empty map->changes_pkt_data will be false.
> > It will be set to true only when the first
> > prog_fd_array_get_ptr()->bpf_prog_map_compatible()
> > will record changes_pkt_data from the first prog inserted in prog_array.
> >
> > So main prog reading skb->data after calling subprog that tail_calls
> > somewhere should assume that skb->data is invalidated.
>
> Yes, that's what I was planning to do.
>
> > That's pretty much your rule "every tail call changes packet data".
> >
> > I think we can go with this simplest approach as well.
> > The test you mentioned have to be adjusted. Not a big deal.
> >
> > Or we can do:
> > "if prog_array empty assume adjusts_pkt_data == true,
> > otherwise adj_pkt_data | = for each map in used_maps {
> > map->owner.adj_pkt_data }"
> >
> > The fancy inline global subprog traversal would have to have the same
> > "simple" (or call it dumb) rule.
> > So at the end both inline or check_cfg are not accurate at all,
> > but check_cfg approach is so much simpler.
>
> I don't like the '|= map->owner.adj_pkt_data' thing, tbh.
> I think "any tail call changes packet data" is simpler to reason about.
> But then, the question is, how to modify the test?
> You suggested bpf_xdp_adjust_meta(xdp, 0) for 'xdp', and it is a good fit,
> as it does not do much.
> For 'skb' I don't see anything better than 'bpf_skb_pull_data(sk, 0)',
> it does skb_ensure_writable(), but everything else does more.

bpf_skb_change_proto(skb, skb->protocol, 0)
is a guaranteed success and as close to nop as possible.
Or the same with invalid flags or invalid proto.
bpf_skb_change_proto(skb, 0, 0)
Guaranteed -EINVAL.

> Dedicated kfunc would be cleaner, though.

Sorry I disagree.
diff mbox series

Patch

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index f4290c179bee..48b7b2eeb7e2 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -659,6 +659,7 @@  struct bpf_subprog_info {
 	bool args_cached: 1;
 	/* true if bpf_fastcall stack region is used by functions that can't be inlined */
 	bool keep_fastcall_stack: 1;
+	bool changes_pkt_data: 1;
 
 	enum priv_stack_mode priv_stack_mode;
 	u8 arg_cnt;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ad3f6d28e8e4..6a29b68cebd6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -10042,6 +10042,8 @@  static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 
 		verbose(env, "Func#%d ('%s') is global and assumed valid.\n",
 			subprog, sub_name);
+		if (env->subprog_info[subprog].changes_pkt_data)
+			clear_all_pkt_pointers(env);
 		/* mark global subprog for verifying after main prog */
 		subprog_aux(env, subprog)->called = true;
 		clear_caller_saved_regs(env, caller->regs);
@@ -16246,6 +16248,29 @@  static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 	return 0;
 }
 
+static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off)
+{
+	struct bpf_subprog_info *subprog;
+
+	subprog = find_containing_subprog(env, off);
+	subprog->changes_pkt_data = true;
+}
+
+/* 't' is an index of a call-site.
+ * 'w' is a callee entry point.
+ * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED.
+ * Rely on DFS traversal order and absence of recursive calls to guarantee that
+ * callee's change_pkt_data marks would be correct at that moment.
+ */
+static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w)
+{
+	struct bpf_subprog_info *caller, *callee;
+
+	caller = find_containing_subprog(env, t);
+	callee = find_containing_subprog(env, w);
+	caller->changes_pkt_data |= callee->changes_pkt_data;
+}
+
 /* non-recursive DFS pseudo code
  * 1  procedure DFS-iterative(G,v):
  * 2      label v as discovered
@@ -16379,6 +16404,7 @@  static int visit_func_call_insn(int t, struct bpf_insn *insns,
 				bool visit_callee)
 {
 	int ret, insn_sz;
+	int w;
 
 	insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
 	ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
@@ -16390,8 +16416,10 @@  static int visit_func_call_insn(int t, struct bpf_insn *insns,
 	mark_jmp_point(env, t + insn_sz);
 
 	if (visit_callee) {
+		w = t + insns[t].imm + 1;
 		mark_prune_point(env, t);
-		ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
+		merge_callee_effects(env, t, w);
+		ret = push_insn(t, w, BRANCH, env);
 	}
 	return ret;
 }
@@ -16708,6 +16736,8 @@  static int visit_insn(int t, struct bpf_verifier_env *env)
 			mark_prune_point(env, t);
 			mark_jmp_point(env, t);
 		}
+		if (bpf_helper_call(insn) && bpf_helper_changes_pkt_data(insn->imm))
+			mark_subprog_changes_pkt_data(env, t);
 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
 			struct bpf_kfunc_call_arg_meta meta;