diff mbox series

[v4,bpf-next,3/7] bpf: add fd_array_cnt attribute for prog_load

Message ID 20241203135052.3380721-4-aspsk@isovalent.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Add fd_array_cnt attribute for BPF_PROG_LOAD | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 194 this patch: 194
netdev/build_tools success Errors and warnings before: 1 (+0) this patch: 1 (+0)
netdev/cc_maintainers fail 12 maintainers not CCed: sdf@fomichev.me haoluo@google.com eddyz87@gmail.com kpsingh@kernel.org john.fastabend@gmail.com andrii@kernel.org martin.lau@linux.dev jolsa@kernel.org yonghong.song@linux.dev song@kernel.org daniel@iogearbox.net ast@kernel.org
netdev/build_clang success Errors and warnings before: 225 this patch: 225
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6967 this patch: 6967
netdev/checkpatch warning WARNING: Missing a blank line after declarations WARNING: line length of 95 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (sched_ext, false, 360) / sched_ext on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (sched_ext, false, 360) / sched_ext on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (sched_ext, false, 360) / sched_ext on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (sched_ext, false, 360) / sched_ext on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Anton Protopopov Dec. 3, 2024, 1:50 p.m. UTC
The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
of file descriptors: maps or btfs. This field was introduced as a
sparse array. Introduce a new attribute, fd_array_cnt, which, if
present, indicates that the fd_array is a continuous array of the
corresponding length.

If fd_array_cnt is non-zero, then every map in the fd_array will be
bound to the program, as if it was used by the program. This
functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
maps can be used by the verifier during the program load.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
---
 include/uapi/linux/bpf.h       | 10 ++++
 kernel/bpf/syscall.c           |  2 +-
 kernel/bpf/verifier.c          | 98 ++++++++++++++++++++++++++++------
 tools/include/uapi/linux/bpf.h | 10 ++++
 4 files changed, 104 insertions(+), 16 deletions(-)

Comments

Andrii Nakryiko Dec. 3, 2024, 9:25 p.m. UTC | #1
On Tue, Dec 3, 2024 at 5:48 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> of file descriptors: maps or btfs. This field was introduced as a
> sparse array. Introduce a new attribute, fd_array_cnt, which, if
> present, indicates that the fd_array is a continuous array of the
> corresponding length.
>
> If fd_array_cnt is non-zero, then every map in the fd_array will be
> bound to the program, as if it was used by the program. This
> functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> maps can be used by the verifier during the program load.
>
> Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> ---
>  include/uapi/linux/bpf.h       | 10 ++++
>  kernel/bpf/syscall.c           |  2 +-
>  kernel/bpf/verifier.c          | 98 ++++++++++++++++++++++++++++------
>  tools/include/uapi/linux/bpf.h | 10 ++++
>  4 files changed, 104 insertions(+), 16 deletions(-)
>

[...]

> +/*
> + * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
> + * this case expect that every file descriptor in the array is either a map or
> + * a BTF. Everything else is considered to be trash.
> + */
> +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> +{
> +       struct bpf_map *map;
> +       CLASS(fd, f)(fd);
> +       int ret;
> +
> +       map = __bpf_map_get(f);
> +       if (!IS_ERR(map)) {
> +               ret = __add_used_map(env, map);
> +               if (ret < 0)
> +                       return ret;
> +               return 0;
> +       }
> +
> +       /*
> +        * Unlike "unused" maps which do not appear in the BPF program,
> +        * BTFs are visible, so no reason to refcnt them now

What does "BTFs are visible" mean? I find this behavior surprising,
tbh. Map is added to used_maps, but BTF is *not* added to used_btfs?
Why?

> +        */
> +       if (!IS_ERR(__btf_get_by_fd(f)))
> +               return 0;
> +
> +       verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> +       return PTR_ERR(map);
> +}
> +
> +static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> +{
> +       size_t size = sizeof(int);
> +       int ret;
> +       int fd;
> +       u32 i;
> +
> +       env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> +
> +       /*
> +        * The only difference between old (no fd_array_cnt is given) and new
> +        * APIs is that in the latter case the fd_array is expected to be
> +        * continuous and is scanned for map fds right away
> +        */
> +       if (!attr->fd_array_cnt)
> +               return 0;
> +
> +       for (i = 0; i < attr->fd_array_cnt; i++) {
> +               if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))

potential overflow in `i * size`? Do we limit fd_array_cnt anywhere to
less than INT_MAX/4?

> +                       return -EFAULT;
> +
> +               ret = add_fd_from_fd_array(env, fd);
> +               if (ret)
> +                       return ret;
> +       }
> +
> +       return 0;
> +}
> +

[...]
Anton Protopopov Dec. 4, 2024, 12:22 p.m. UTC | #2
On 24/12/03 01:25PM, Andrii Nakryiko wrote:
> On Tue, Dec 3, 2024 at 5:48 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> > of file descriptors: maps or btfs. This field was introduced as a
> > sparse array. Introduce a new attribute, fd_array_cnt, which, if
> > present, indicates that the fd_array is a continuous array of the
> > corresponding length.
> >
> > If fd_array_cnt is non-zero, then every map in the fd_array will be
> > bound to the program, as if it was used by the program. This
> > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> > maps can be used by the verifier during the program load.
> >
> > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > ---
> >  include/uapi/linux/bpf.h       | 10 ++++
> >  kernel/bpf/syscall.c           |  2 +-
> >  kernel/bpf/verifier.c          | 98 ++++++++++++++++++++++++++++------
> >  tools/include/uapi/linux/bpf.h | 10 ++++
> >  4 files changed, 104 insertions(+), 16 deletions(-)
> >
> 
> [...]
> 
> > +/*
> > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
> > + * this case expect that every file descriptor in the array is either a map or
> > + * a BTF. Everything else is considered to be trash.
> > + */
> > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> > +{
> > +       struct bpf_map *map;
> > +       CLASS(fd, f)(fd);
> > +       int ret;
> > +
> > +       map = __bpf_map_get(f);
> > +       if (!IS_ERR(map)) {
> > +               ret = __add_used_map(env, map);
> > +               if (ret < 0)
> > +                       return ret;
> > +               return 0;
> > +       }
> > +
> > +       /*
> > +        * Unlike "unused" maps which do not appear in the BPF program,
> > +        * BTFs are visible, so no reason to refcnt them now
> 
> What does "BTFs are visible" mean? I find this behavior surprising,
> tbh. Map is added to used_maps, but BTF is *not* added to used_btfs?
> Why?

This functionality is added to catch maps, and work with them during
verification, which aren't otherwise referenced by program code. The
actual application is those "instructions set" maps for static keys.
All other objects are "visible" during verification.

> > +        */
> > +       if (!IS_ERR(__btf_get_by_fd(f)))
> > +               return 0;
> > +
> > +       verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> > +       return PTR_ERR(map);
> > +}
> > +
> > +static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> > +{
> > +       size_t size = sizeof(int);
> > +       int ret;
> > +       int fd;
> > +       u32 i;
> > +
> > +       env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> > +
> > +       /*
> > +        * The only difference between old (no fd_array_cnt is given) and new
> > +        * APIs is that in the latter case the fd_array is expected to be
> > +        * continuous and is scanned for map fds right away
> > +        */
> > +       if (!attr->fd_array_cnt)
> > +               return 0;
> > +
> > +       for (i = 0; i < attr->fd_array_cnt; i++) {
> > +               if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
> 
> potential overflow in `i * size`? Do we limit fd_array_cnt anywhere to
> less than INT_MAX/4?

Right. So, probably cap to (UINT_MAX/size)?

> > +                       return -EFAULT;
> > +
> > +               ret = add_fd_from_fd_array(env, fd);
> > +               if (ret)
> > +                       return ret;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> 
> [...]
Andrii Nakryiko Dec. 4, 2024, 6:08 p.m. UTC | #3
On Wed, Dec 4, 2024 at 4:19 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> On 24/12/03 01:25PM, Andrii Nakryiko wrote:
> > On Tue, Dec 3, 2024 at 5:48 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > >
> > > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> > > of file descriptors: maps or btfs. This field was introduced as a
> > > sparse array. Introduce a new attribute, fd_array_cnt, which, if
> > > present, indicates that the fd_array is a continuous array of the
> > > corresponding length.
> > >
> > > If fd_array_cnt is non-zero, then every map in the fd_array will be
> > > bound to the program, as if it was used by the program. This
> > > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> > > maps can be used by the verifier during the program load.
> > >
> > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > > ---
> > >  include/uapi/linux/bpf.h       | 10 ++++
> > >  kernel/bpf/syscall.c           |  2 +-
> > >  kernel/bpf/verifier.c          | 98 ++++++++++++++++++++++++++++------
> > >  tools/include/uapi/linux/bpf.h | 10 ++++
> > >  4 files changed, 104 insertions(+), 16 deletions(-)
> > >
> >
> > [...]
> >
> > > +/*
> > > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
> > > + * this case expect that every file descriptor in the array is either a map or
> > > + * a BTF. Everything else is considered to be trash.
> > > + */
> > > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> > > +{
> > > +       struct bpf_map *map;
> > > +       CLASS(fd, f)(fd);
> > > +       int ret;
> > > +
> > > +       map = __bpf_map_get(f);
> > > +       if (!IS_ERR(map)) {
> > > +               ret = __add_used_map(env, map);
> > > +               if (ret < 0)
> > > +                       return ret;
> > > +               return 0;
> > > +       }
> > > +
> > > +       /*
> > > +        * Unlike "unused" maps which do not appear in the BPF program,
> > > +        * BTFs are visible, so no reason to refcnt them now
> >
> > What does "BTFs are visible" mean? I find this behavior surprising,
> > tbh. Map is added to used_maps, but BTF is *not* added to used_btfs?
> > Why?
>
> This functionality is added to catch maps, and work with them during
> verification, which aren't otherwise referenced by program code. The
> actual application is those "instructions set" maps for static keys.
> All other objects are "visible" during verification.

That's your specific intended use case, but API is semantically more
generic and shouldn't tailor to your specific interpretation on how it
will/should be used. I think this is a landmine to add reference to
just BPF maps and not to BTF objects, we won't be able to retrofit the
proper and uniform treatment later without extra flags or backwards
compatibility breakage.

Even though we don't need extra "detached" BTF objects associated with
BPF program, right now, I can anticipate some interesting use case
where we might want to attach additional BTF objects to BPF programs
(for whatever reasons, BTFs are a convenient bag of strings and
graph-based types, so could be useful for extra
debugging/metadata/whatever information).

So I can see only two ways forward. Either we disable BTFs in fd_array
if fd_array_cnt>0, which will prevent its usage from light skeleton,
so not great. Or we bump refcount both BPF maps and BTFs in fd_array.


The latter seems saner and I don't think is a problem at all, we
already have used_btfs that function similarly to used_maps.

>
> > > +        */
> > > +       if (!IS_ERR(__btf_get_by_fd(f)))
> > > +               return 0;
> > > +
> > > +       verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> > > +       return PTR_ERR(map);
> > > +}
> > > +
> > > +static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> > > +{
> > > +       size_t size = sizeof(int);
> > > +       int ret;
> > > +       int fd;
> > > +       u32 i;
> > > +
> > > +       env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> > > +
> > > +       /*
> > > +        * The only difference between old (no fd_array_cnt is given) and new
> > > +        * APIs is that in the latter case the fd_array is expected to be
> > > +        * continuous and is scanned for map fds right away
> > > +        */
> > > +       if (!attr->fd_array_cnt)
> > > +               return 0;
> > > +
> > > +       for (i = 0; i < attr->fd_array_cnt; i++) {
> > > +               if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
> >
> > potential overflow in `i * size`? Do we limit fd_array_cnt anywhere to
> > less than INT_MAX/4?
>
> Right. So, probably cap to (UINT_MAX/size)?

either that or use check_mul_overflow()

>
> > > +                       return -EFAULT;
> > > +
> > > +               ret = add_fd_from_fd_array(env, fd);
> > > +               if (ret)
> > > +                       return ret;
> > > +       }
> > > +
> > > +       return 0;
> > > +}
> > > +
> >
> > [...]
Anton Protopopov Dec. 5, 2024, 8:41 a.m. UTC | #4
On 24/12/04 10:08AM, Andrii Nakryiko wrote:
> On Wed, Dec 4, 2024 at 4:19 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > On 24/12/03 01:25PM, Andrii Nakryiko wrote:
> > > On Tue, Dec 3, 2024 at 5:48 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > >
> > > > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> > > > of file descriptors: maps or btfs. This field was introduced as a
> > > > sparse array. Introduce a new attribute, fd_array_cnt, which, if
> > > > present, indicates that the fd_array is a continuous array of the
> > > > corresponding length.
> > > >
> > > > If fd_array_cnt is non-zero, then every map in the fd_array will be
> > > > bound to the program, as if it was used by the program. This
> > > > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> > > > maps can be used by the verifier during the program load.
> > > >
> > > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > > > ---
> > > >  include/uapi/linux/bpf.h       | 10 ++++
> > > >  kernel/bpf/syscall.c           |  2 +-
> > > >  kernel/bpf/verifier.c          | 98 ++++++++++++++++++++++++++++------
> > > >  tools/include/uapi/linux/bpf.h | 10 ++++
> > > >  4 files changed, 104 insertions(+), 16 deletions(-)
> > > >
> > >
> > > [...]
> > >
> > > > +/*
> > > > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
> > > > + * this case expect that every file descriptor in the array is either a map or
> > > > + * a BTF. Everything else is considered to be trash.
> > > > + */
> > > > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> > > > +{
> > > > +       struct bpf_map *map;
> > > > +       CLASS(fd, f)(fd);
> > > > +       int ret;
> > > > +
> > > > +       map = __bpf_map_get(f);
> > > > +       if (!IS_ERR(map)) {
> > > > +               ret = __add_used_map(env, map);
> > > > +               if (ret < 0)
> > > > +                       return ret;
> > > > +               return 0;
> > > > +       }
> > > > +
> > > > +       /*
> > > > +        * Unlike "unused" maps which do not appear in the BPF program,
> > > > +        * BTFs are visible, so no reason to refcnt them now
> > >
> > > What does "BTFs are visible" mean? I find this behavior surprising,
> > > tbh. Map is added to used_maps, but BTF is *not* added to used_btfs?
> > > Why?
> >
> > This functionality is added to catch maps, and work with them during
> > verification, which aren't otherwise referenced by program code. The
> > actual application is those "instructions set" maps for static keys.
> > All other objects are "visible" during verification.
> 
> That's your specific intended use case, but API is semantically more
> generic and shouldn't tailor to your specific interpretation on how it
> will/should be used. I think this is a landmine to add reference to
> just BPF maps and not to BTF objects, we won't be able to retrofit the
> proper and uniform treatment later without extra flags or backwards
> compatibility breakage.
> 
> Even though we don't need extra "detached" BTF objects associated with
> BPF program, right now, I can anticipate some interesting use case
> where we might want to attach additional BTF objects to BPF programs
> (for whatever reasons, BTFs are a convenient bag of strings and
> graph-based types, so could be useful for extra
> debugging/metadata/whatever information).
> 
> So I can see only two ways forward. Either we disable BTFs in fd_array
> if fd_array_cnt>0, which will prevent its usage from light skeleton,
> so not great. Or we bump refcount both BPF maps and BTFs in fd_array.
> 
> 
> The latter seems saner and I don't think is a problem at all, we
> already have used_btfs that function similarly to used_maps.

This makes total sense to treat all BPF objects in fd_array the same
way. With BTFs the problem is that, currently, a btf fd can end up
either in used_btfs or kfunc_btf_tab. I will take a look at how easy
it is to merge those two.

> >
> > > > +        */
> > > > +       if (!IS_ERR(__btf_get_by_fd(f)))
> > > > +               return 0;
> > > > +
> > > > +       verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> > > > +       return PTR_ERR(map);
> > > > +}
> > > > +
> > > > +static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> > > > +{
> > > > +       size_t size = sizeof(int);
> > > > +       int ret;
> > > > +       int fd;
> > > > +       u32 i;
> > > > +
> > > > +       env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> > > > +
> > > > +       /*
> > > > +        * The only difference between old (no fd_array_cnt is given) and new
> > > > +        * APIs is that in the latter case the fd_array is expected to be
> > > > +        * continuous and is scanned for map fds right away
> > > > +        */
> > > > +       if (!attr->fd_array_cnt)
> > > > +               return 0;
> > > > +
> > > > +       for (i = 0; i < attr->fd_array_cnt; i++) {
> > > > +               if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
> > >
> > > potential overflow in `i * size`? Do we limit fd_array_cnt anywhere to
> > > less than INT_MAX/4?
> >
> > Right. So, probably cap to (UINT_MAX/size)?
> 
> either that or use check_mul_overflow()

Ok, will fix it, thanks.

> >
> > > > +                       return -EFAULT;
> > > > +
> > > > +               ret = add_fd_from_fd_array(env, fd);
> > > > +               if (ret)
> > > > +                       return ret;
> > > > +       }
> > > > +
> > > > +       return 0;
> > > > +}
> > > > +
> > >
> > > [...]
Anton Protopopov Dec. 10, 2024, 8:58 a.m. UTC | #5
On 24/12/05 08:41AM, Anton Protopopov wrote:
> On 24/12/04 10:08AM, Andrii Nakryiko wrote:
> > On Wed, Dec 4, 2024 at 4:19 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > >
> > > On 24/12/03 01:25PM, Andrii Nakryiko wrote:
> > > > On Tue, Dec 3, 2024 at 5:48 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > >
> > > > > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> > > > > of file descriptors: maps or btfs. This field was introduced as a
> > > > > sparse array. Introduce a new attribute, fd_array_cnt, which, if
> > > > > present, indicates that the fd_array is a continuous array of the
> > > > > corresponding length.
> > > > >
> > > > > If fd_array_cnt is non-zero, then every map in the fd_array will be
> > > > > bound to the program, as if it was used by the program. This
> > > > > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> > > > > maps can be used by the verifier during the program load.
> > > > >
> > > > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > > > > ---
> > > > >  include/uapi/linux/bpf.h       | 10 ++++
> > > > >  kernel/bpf/syscall.c           |  2 +-
> > > > >  kernel/bpf/verifier.c          | 98 ++++++++++++++++++++++++++++------
> > > > >  tools/include/uapi/linux/bpf.h | 10 ++++
> > > > >  4 files changed, 104 insertions(+), 16 deletions(-)
> > > > >
> > > >
> > > > [...]
> > > >
> > > > > +/*
> > > > > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
> > > > > + * this case expect that every file descriptor in the array is either a map or
> > > > > + * a BTF. Everything else is considered to be trash.
> > > > > + */
> > > > > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> > > > > +{
> > > > > +       struct bpf_map *map;
> > > > > +       CLASS(fd, f)(fd);
> > > > > +       int ret;
> > > > > +
> > > > > +       map = __bpf_map_get(f);
> > > > > +       if (!IS_ERR(map)) {
> > > > > +               ret = __add_used_map(env, map);
> > > > > +               if (ret < 0)
> > > > > +                       return ret;
> > > > > +               return 0;
> > > > > +       }
> > > > > +
> > > > > +       /*
> > > > > +        * Unlike "unused" maps which do not appear in the BPF program,
> > > > > +        * BTFs are visible, so no reason to refcnt them now
> > > >
> > > > What does "BTFs are visible" mean? I find this behavior surprising,
> > > > tbh. Map is added to used_maps, but BTF is *not* added to used_btfs?
> > > > Why?
> > >
> > > This functionality is added to catch maps, and work with them during
> > > verification, which aren't otherwise referenced by program code. The
> > > actual application is those "instructions set" maps for static keys.
> > > All other objects are "visible" during verification.
> > 
> > That's your specific intended use case, but API is semantically more
> > generic and shouldn't tailor to your specific interpretation on how it
> > will/should be used. I think this is a landmine to add reference to
> > just BPF maps and not to BTF objects, we won't be able to retrofit the
> > proper and uniform treatment later without extra flags or backwards
> > compatibility breakage.
> > 
> > Even though we don't need extra "detached" BTF objects associated with
> > BPF program, right now, I can anticipate some interesting use case
> > where we might want to attach additional BTF objects to BPF programs
> > (for whatever reasons, BTFs are a convenient bag of strings and
> > graph-based types, so could be useful for extra
> > debugging/metadata/whatever information).
> > 
> > So I can see only two ways forward. Either we disable BTFs in fd_array
> > if fd_array_cnt>0, which will prevent its usage from light skeleton,
> > so not great. Or we bump refcount both BPF maps and BTFs in fd_array.
> > 
> > 
> > The latter seems saner and I don't think is a problem at all, we
> > already have used_btfs that function similarly to used_maps.
> 
> This makes total sense to treat all BPF objects in fd_array the same
> way. With BTFs the problem is that, currently, a btf fd can end up
> either in used_btfs or kfunc_btf_tab. I will take a look at how easy
> it is to merge those two.

So, currently during program load BTFs are parsed from file
descriptors and are stored in two places: env->used_btfs and
env->prog->aux->kfunc_btf_tab:

  1) env->used_btfs populated only when a DW load with the
     (src_reg == BPF_PSEUDO_BTF_ID) flag set is performed

  2) kfunc_btf_tab is populated by __find_kfunc_desc_btf(),
     and the source is attr->fd_array[offset]. The kfunc_btf_tab is
     sorted by offset to allow faster search

So, to merge them something like this might be done:

  1) If fd_array_cnt != 0, then on load create a [sorted by offset]
     table "used_btfs", formatted similar to kfunc_btf_tab in (2)
     above.

  2) On program load change (1) to add a btf to this new sorted
     used_btfs. As there is no corresponding offset, just use
     offset=-1 (not literally like this, as bsearch() wants unique
     keys, so by offset=-1 an array of btfs, aka, old used_maps,
     should be stored)

Looks like this, conceptually, doesn't change things too much: kfuncs
btfs will still be searchable in log(n) time, the "normal" btfs will
still be searched in used_btfs in linear time.

(The other way is to just allow kfunc btfs to be loaded from fd_array
if fd_array_cnt != 0, as it is done now, but as you've mentioned
before, you had other use cases in mind, so this won't work.)

> > >
> > > > > +        */
> > > > > +       if (!IS_ERR(__btf_get_by_fd(f)))
> > > > > +               return 0;
> > > > > +
> > > > > +       verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> > > > > +       return PTR_ERR(map);
> > > > > +}
> > > > > +
> > > > > +static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> > > > > +{
> > > > > +       size_t size = sizeof(int);
> > > > > +       int ret;
> > > > > +       int fd;
> > > > > +       u32 i;
> > > > > +
> > > > > +       env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> > > > > +
> > > > > +       /*
> > > > > +        * The only difference between old (no fd_array_cnt is given) and new
> > > > > +        * APIs is that in the latter case the fd_array is expected to be
> > > > > +        * continuous and is scanned for map fds right away
> > > > > +        */
> > > > > +       if (!attr->fd_array_cnt)
> > > > > +               return 0;
> > > > > +
> > > > > +       for (i = 0; i < attr->fd_array_cnt; i++) {
> > > > > +               if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
> > > >
> > > > potential overflow in `i * size`? Do we limit fd_array_cnt anywhere to
> > > > less than INT_MAX/4?
> > >
> > > Right. So, probably cap to (UINT_MAX/size)?
> > 
> > either that or use check_mul_overflow()
> 
> Ok, will fix it, thanks.

On the second look, there's no overflow here, as (int) * (size_t) is
expanded by C to (size_t), and argument is also (size_t).

However, maybe this is still makes sense to restrict the maximum size
of fd_array to something like (1 << 16). (The number of unique fds in
the end will be ~(MAX_USED_MAPS + MAX_USED_BTFS + MAX_KFUNC_BTFS).)

> > >
> > > > > +                       return -EFAULT;
> > > > > +
> > > > > +               ret = add_fd_from_fd_array(env, fd);
> > > > > +               if (ret)
> > > > > +                       return ret;
> > > > > +       }
> > > > > +
> > > > > +       return 0;
> > > > > +}
> > > > > +
> > > >
> > > > [...]
Alexei Starovoitov Dec. 10, 2024, 3:57 p.m. UTC | #6
On Tue, Dec 10, 2024 at 12:56 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> >
> > This makes total sense to treat all BPF objects in fd_array the same
> > way. With BTFs the problem is that, currently, a btf fd can end up
> > either in used_btfs or kfunc_btf_tab. I will take a look at how easy
> > it is to merge those two.
>
> So, currently during program load BTFs are parsed from file
> descriptors and are stored in two places: env->used_btfs and
> env->prog->aux->kfunc_btf_tab:
>
>   1) env->used_btfs populated only when a DW load with the
>      (src_reg == BPF_PSEUDO_BTF_ID) flag set is performed
>
>   2) kfunc_btf_tab is populated by __find_kfunc_desc_btf(),
>      and the source is attr->fd_array[offset]. The kfunc_btf_tab is
>      sorted by offset to allow faster search
>
> So, to merge them something like this might be done:
>
>   1) If fd_array_cnt != 0, then on load create a [sorted by offset]
>      table "used_btfs", formatted similar to kfunc_btf_tab in (2)
>      above.
>
>   2) On program load change (1) to add a btf to this new sorted
>      used_btfs. As there is no corresponding offset, just use
>      offset=-1 (not literally like this, as bsearch() wants unique
>      keys, so by offset=-1 an array of btfs, aka, old used_maps,
>      should be stored)
>
> Looks like this, conceptually, doesn't change things too much: kfuncs
> btfs will still be searchable in log(n) time, the "normal" btfs will
> still be searched in used_btfs in linear time.
>
> (The other way is to just allow kfunc btfs to be loaded from fd_array
> if fd_array_cnt != 0, as it is done now, but as you've mentioned
> before, you had other use cases in mind, so this won't work.)

This is getting a bit too complex.
I think Andrii is asking to keep BTFs if they are in fd_array.
No need to combine kfunc_btf_tab and used_btfs.
I think adding BTFs from fd_array to prog->aux->used_btfs
should do it.
Andrii Nakryiko Dec. 10, 2024, 6:18 p.m. UTC | #7
On Tue, Dec 10, 2024 at 7:57 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Tue, Dec 10, 2024 at 12:56 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > >
> > > This makes total sense to treat all BPF objects in fd_array the same
> > > way. With BTFs the problem is that, currently, a btf fd can end up
> > > either in used_btfs or kfunc_btf_tab. I will take a look at how easy
> > > it is to merge those two.
> >
> > So, currently during program load BTFs are parsed from file
> > descriptors and are stored in two places: env->used_btfs and
> > env->prog->aux->kfunc_btf_tab:
> >
> >   1) env->used_btfs populated only when a DW load with the
> >      (src_reg == BPF_PSEUDO_BTF_ID) flag set is performed
> >
> >   2) kfunc_btf_tab is populated by __find_kfunc_desc_btf(),
> >      and the source is attr->fd_array[offset]. The kfunc_btf_tab is
> >      sorted by offset to allow faster search
> >
> > So, to merge them something like this might be done:
> >
> >   1) If fd_array_cnt != 0, then on load create a [sorted by offset]
> >      table "used_btfs", formatted similar to kfunc_btf_tab in (2)
> >      above.
> >
> >   2) On program load change (1) to add a btf to this new sorted
> >      used_btfs. As there is no corresponding offset, just use
> >      offset=-1 (not literally like this, as bsearch() wants unique
> >      keys, so by offset=-1 an array of btfs, aka, old used_maps,
> >      should be stored)
> >
> > Looks like this, conceptually, doesn't change things too much: kfuncs
> > btfs will still be searchable in log(n) time, the "normal" btfs will
> > still be searched in used_btfs in linear time.
> >
> > (The other way is to just allow kfunc btfs to be loaded from fd_array
> > if fd_array_cnt != 0, as it is done now, but as you've mentioned
> > before, you had other use cases in mind, so this won't work.)
>
> This is getting a bit too complex.
> I think Andrii is asking to keep BTFs if they are in fd_array.
> No need to combine kfunc_btf_tab and used_btfs.
> I think adding BTFs from fd_array to prog->aux->used_btfs
> should do it.

Exactly, no need to do major changes, let's just add those BTFs into
used_btfs, that's all.
Andrii Nakryiko Dec. 10, 2024, 6:19 p.m. UTC | #8
On Tue, Dec 10, 2024 at 12:56 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> On 24/12/05 08:41AM, Anton Protopopov wrote:
> > On 24/12/04 10:08AM, Andrii Nakryiko wrote:
> > > On Wed, Dec 4, 2024 at 4:19 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > >
> > > > On 24/12/03 01:25PM, Andrii Nakryiko wrote:
> > > > > On Tue, Dec 3, 2024 at 5:48 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > >
> > > > > > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> > > > > > of file descriptors: maps or btfs. This field was introduced as a
> > > > > > sparse array. Introduce a new attribute, fd_array_cnt, which, if
> > > > > > present, indicates that the fd_array is a continuous array of the
> > > > > > corresponding length.
> > > > > >
> > > > > > If fd_array_cnt is non-zero, then every map in the fd_array will be
> > > > > > bound to the program, as if it was used by the program. This
> > > > > > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> > > > > > maps can be used by the verifier during the program load.
> > > > > >
> > > > > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > > > > > ---
> > > > > >  include/uapi/linux/bpf.h       | 10 ++++
> > > > > >  kernel/bpf/syscall.c           |  2 +-
> > > > > >  kernel/bpf/verifier.c          | 98 ++++++++++++++++++++++++++++------
> > > > > >  tools/include/uapi/linux/bpf.h | 10 ++++
> > > > > >  4 files changed, 104 insertions(+), 16 deletions(-)
> > > > > >
> > > > >
> > > > > [...]
> > > > >
> > > > > > +/*
> > > > > > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
> > > > > > + * this case expect that every file descriptor in the array is either a map or
> > > > > > + * a BTF. Everything else is considered to be trash.
> > > > > > + */
> > > > > > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> > > > > > +{
> > > > > > +       struct bpf_map *map;
> > > > > > +       CLASS(fd, f)(fd);
> > > > > > +       int ret;
> > > > > > +
> > > > > > +       map = __bpf_map_get(f);
> > > > > > +       if (!IS_ERR(map)) {
> > > > > > +               ret = __add_used_map(env, map);
> > > > > > +               if (ret < 0)
> > > > > > +                       return ret;
> > > > > > +               return 0;
> > > > > > +       }
> > > > > > +
> > > > > > +       /*
> > > > > > +        * Unlike "unused" maps which do not appear in the BPF program,
> > > > > > +        * BTFs are visible, so no reason to refcnt them now
> > > > >
> > > > > What does "BTFs are visible" mean? I find this behavior surprising,
> > > > > tbh. Map is added to used_maps, but BTF is *not* added to used_btfs?
> > > > > Why?
> > > >
> > > > This functionality is added to catch maps, and work with them during
> > > > verification, which aren't otherwise referenced by program code. The
> > > > actual application is those "instructions set" maps for static keys.
> > > > All other objects are "visible" during verification.
> > >
> > > That's your specific intended use case, but API is semantically more
> > > generic and shouldn't tailor to your specific interpretation on how it
> > > will/should be used. I think this is a landmine to add reference to
> > > just BPF maps and not to BTF objects, we won't be able to retrofit the
> > > proper and uniform treatment later without extra flags or backwards
> > > compatibility breakage.
> > >
> > > Even though we don't need extra "detached" BTF objects associated with
> > > BPF program, right now, I can anticipate some interesting use case
> > > where we might want to attach additional BTF objects to BPF programs
> > > (for whatever reasons, BTFs are a convenient bag of strings and
> > > graph-based types, so could be useful for extra
> > > debugging/metadata/whatever information).
> > >
> > > So I can see only two ways forward. Either we disable BTFs in fd_array
> > > if fd_array_cnt>0, which will prevent its usage from light skeleton,
> > > so not great. Or we bump refcount both BPF maps and BTFs in fd_array.
> > >
> > >
> > > The latter seems saner and I don't think is a problem at all, we
> > > already have used_btfs that function similarly to used_maps.
> >
> > This makes total sense to treat all BPF objects in fd_array the same
> > way. With BTFs the problem is that, currently, a btf fd can end up
> > either in used_btfs or kfunc_btf_tab. I will take a look at how easy
> > it is to merge those two.
>
> So, currently during program load BTFs are parsed from file
> descriptors and are stored in two places: env->used_btfs and
> env->prog->aux->kfunc_btf_tab:
>
>   1) env->used_btfs populated only when a DW load with the
>      (src_reg == BPF_PSEUDO_BTF_ID) flag set is performed
>
>   2) kfunc_btf_tab is populated by __find_kfunc_desc_btf(),
>      and the source is attr->fd_array[offset]. The kfunc_btf_tab is
>      sorted by offset to allow faster search
>
> So, to merge them something like this might be done:
>
>   1) If fd_array_cnt != 0, then on load create a [sorted by offset]
>      table "used_btfs", formatted similar to kfunc_btf_tab in (2)
>      above.
>
>   2) On program load change (1) to add a btf to this new sorted
>      used_btfs. As there is no corresponding offset, just use
>      offset=-1 (not literally like this, as bsearch() wants unique
>      keys, so by offset=-1 an array of btfs, aka, old used_maps,
>      should be stored)
>
> Looks like this, conceptually, doesn't change things too much: kfuncs
> btfs will still be searchable in log(n) time, the "normal" btfs will
> still be searched in used_btfs in linear time.
>
> (The other way is to just allow kfunc btfs to be loaded from fd_array
> if fd_array_cnt != 0, as it is done now, but as you've mentioned
> before, you had other use cases in mind, so this won't work.)
>
> > > >
> > > > > > +        */
> > > > > > +       if (!IS_ERR(__btf_get_by_fd(f)))
> > > > > > +               return 0;
> > > > > > +
> > > > > > +       verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> > > > > > +       return PTR_ERR(map);
> > > > > > +}
> > > > > > +
> > > > > > +static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> > > > > > +{
> > > > > > +       size_t size = sizeof(int);
> > > > > > +       int ret;
> > > > > > +       int fd;
> > > > > > +       u32 i;
> > > > > > +
> > > > > > +       env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> > > > > > +
> > > > > > +       /*
> > > > > > +        * The only difference between old (no fd_array_cnt is given) and new
> > > > > > +        * APIs is that in the latter case the fd_array is expected to be
> > > > > > +        * continuous and is scanned for map fds right away
> > > > > > +        */
> > > > > > +       if (!attr->fd_array_cnt)
> > > > > > +               return 0;
> > > > > > +
> > > > > > +       for (i = 0; i < attr->fd_array_cnt; i++) {
> > > > > > +               if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
> > > > >
> > > > > potential overflow in `i * size`? Do we limit fd_array_cnt anywhere to
> > > > > less than INT_MAX/4?
> > > >
> > > > Right. So, probably cap to (UINT_MAX/size)?
> > >
> > > either that or use check_mul_overflow()
> >
> > Ok, will fix it, thanks.
>
> On the second look, there's no overflow here, as (int) * (size_t) is
> expanded by C to (size_t), and argument is also (size_t).

What about 32-bit architectures? 64-bit ones are not a problem, of course.

>
> However, maybe this is still makes sense to restrict the maximum size
> of fd_array to something like (1 << 16). (The number of unique fds in
> the end will be ~(MAX_USED_MAPS + MAX_USED_BTFS + MAX_KFUNC_BTFS).)
>
> > > >
> > > > > > +                       return -EFAULT;
> > > > > > +
> > > > > > +               ret = add_fd_from_fd_array(env, fd);
> > > > > > +               if (ret)
> > > > > > +                       return ret;
> > > > > > +       }
> > > > > > +
> > > > > > +       return 0;
> > > > > > +}
> > > > > > +
> > > > >
> > > > > [...]
Anton Protopopov Dec. 12, 2024, 5:17 p.m. UTC | #9
On 24/12/10 10:18AM, Andrii Nakryiko wrote:
> On Tue, Dec 10, 2024 at 7:57 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Tue, Dec 10, 2024 at 12:56 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > >
> > > >
> > > > This makes total sense to treat all BPF objects in fd_array the same
> > > > way. With BTFs the problem is that, currently, a btf fd can end up
> > > > either in used_btfs or kfunc_btf_tab. I will take a look at how easy
> > > > it is to merge those two.
> > >
> > > So, currently during program load BTFs are parsed from file
> > > descriptors and are stored in two places: env->used_btfs and
> > > env->prog->aux->kfunc_btf_tab:
> > >
> > >   1) env->used_btfs populated only when a DW load with the
> > >      (src_reg == BPF_PSEUDO_BTF_ID) flag set is performed
> > >
> > >   2) kfunc_btf_tab is populated by __find_kfunc_desc_btf(),
> > >      and the source is attr->fd_array[offset]. The kfunc_btf_tab is
> > >      sorted by offset to allow faster search
> > >
> > > So, to merge them something like this might be done:
> > >
> > >   1) If fd_array_cnt != 0, then on load create a [sorted by offset]
> > >      table "used_btfs", formatted similar to kfunc_btf_tab in (2)
> > >      above.
> > >
> > >   2) On program load change (1) to add a btf to this new sorted
> > >      used_btfs. As there is no corresponding offset, just use
> > >      offset=-1 (not literally like this, as bsearch() wants unique
> > >      keys, so by offset=-1 an array of btfs, aka, old used_maps,
> > >      should be stored)
> > >
> > > Looks like this, conceptually, doesn't change things too much: kfuncs
> > > btfs will still be searchable in log(n) time, the "normal" btfs will
> > > still be searched in used_btfs in linear time.
> > >
> > > (The other way is to just allow kfunc btfs to be loaded from fd_array
> > > if fd_array_cnt != 0, as it is done now, but as you've mentioned
> > > before, you had other use cases in mind, so this won't work.)
> >
> > This is getting a bit too complex.
> > I think Andrii is asking to keep BTFs if they are in fd_array.
> > No need to combine kfunc_btf_tab and used_btfs.
> > I think adding BTFs from fd_array to prog->aux->used_btfs
> > should do it.
> 
> Exactly, no need to do major changes, let's just add those BTFs into
> used_btfs, that's all.

Added. However, I have a question here: how to add proper selftests? The btfs
listed in env->used_btfs are later copied to prog->aux->used_btfs, and are
never actually exposed to user-space in any way. So, one test I can think of is

  * passing a btf fd in fd_array on prog load
  * closing this btf fd and checking that id exists before closing the program
    (requires to wait until rcu sync to be sure that the btf wasn't destroyed,
    but still is refcounted)

Is this enough?

(I assume exposing used_btfs to user-space is also out of scope of this patch
set, right?)
Anton Protopopov Dec. 12, 2024, 5:26 p.m. UTC | #10
On 24/12/10 10:19AM, Andrii Nakryiko wrote:
> On Tue, Dec 10, 2024 at 12:56 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > On 24/12/05 08:41AM, Anton Protopopov wrote:
> > > On 24/12/04 10:08AM, Andrii Nakryiko wrote:
> > > > On Wed, Dec 4, 2024 at 4:19 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > >
> > > > > On 24/12/03 01:25PM, Andrii Nakryiko wrote:
> > > > > > On Tue, Dec 3, 2024 at 5:48 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > >
> > > > > > > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> > > > > > > of file descriptors: maps or btfs. This field was introduced as a
> > > > > > > sparse array. Introduce a new attribute, fd_array_cnt, which, if
> > > > > > > present, indicates that the fd_array is a continuous array of the
> > > > > > > corresponding length.
> > > > > > >
> > > > > > > If fd_array_cnt is non-zero, then every map in the fd_array will be
> > > > > > > bound to the program, as if it was used by the program. This
> > > > > > > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> > > > > > > maps can be used by the verifier during the program load.
> > > > > > >
> > > > > > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > > > > > > ---
> > > > > > >  include/uapi/linux/bpf.h       | 10 ++++
> > > > > > >  kernel/bpf/syscall.c           |  2 +-
> > > > > > >  kernel/bpf/verifier.c          | 98 ++++++++++++++++++++++++++++------
> > > > > > >  tools/include/uapi/linux/bpf.h | 10 ++++
> > > > > > >  4 files changed, 104 insertions(+), 16 deletions(-)
> > > > > > >
> > > > > >
> > > > > > [...]
> > > > > >
> > > > > > > +/*
> > > > > > > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
> > > > > > > + * this case expect that every file descriptor in the array is either a map or
> > > > > > > + * a BTF. Everything else is considered to be trash.
> > > > > > > + */
> > > > > > > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> > > > > > > +{
> > > > > > > +       struct bpf_map *map;
> > > > > > > +       CLASS(fd, f)(fd);
> > > > > > > +       int ret;
> > > > > > > +
> > > > > > > +       map = __bpf_map_get(f);
> > > > > > > +       if (!IS_ERR(map)) {
> > > > > > > +               ret = __add_used_map(env, map);
> > > > > > > +               if (ret < 0)
> > > > > > > +                       return ret;
> > > > > > > +               return 0;
> > > > > > > +       }
> > > > > > > +
> > > > > > > +       /*
> > > > > > > +        * Unlike "unused" maps which do not appear in the BPF program,
> > > > > > > +        * BTFs are visible, so no reason to refcnt them now
> > > > > >
> > > > > > What does "BTFs are visible" mean? I find this behavior surprising,
> > > > > > tbh. Map is added to used_maps, but BTF is *not* added to used_btfs?
> > > > > > Why?
> > > > >
> > > > > This functionality is added to catch maps, and work with them during
> > > > > verification, which aren't otherwise referenced by program code. The
> > > > > actual application is those "instructions set" maps for static keys.
> > > > > All other objects are "visible" during verification.
> > > >
> > > > That's your specific intended use case, but API is semantically more
> > > > generic and shouldn't tailor to your specific interpretation on how it
> > > > will/should be used. I think this is a landmine to add reference to
> > > > just BPF maps and not to BTF objects, we won't be able to retrofit the
> > > > proper and uniform treatment later without extra flags or backwards
> > > > compatibility breakage.
> > > >
> > > > Even though we don't need extra "detached" BTF objects associated with
> > > > BPF program, right now, I can anticipate some interesting use case
> > > > where we might want to attach additional BTF objects to BPF programs
> > > > (for whatever reasons, BTFs are a convenient bag of strings and
> > > > graph-based types, so could be useful for extra
> > > > debugging/metadata/whatever information).
> > > >
> > > > So I can see only two ways forward. Either we disable BTFs in fd_array
> > > > if fd_array_cnt>0, which will prevent its usage from light skeleton,
> > > > so not great. Or we bump refcount both BPF maps and BTFs in fd_array.
> > > >
> > > >
> > > > The latter seems saner and I don't think is a problem at all, we
> > > > already have used_btfs that function similarly to used_maps.
> > >
> > > This makes total sense to treat all BPF objects in fd_array the same
> > > way. With BTFs the problem is that, currently, a btf fd can end up
> > > either in used_btfs or kfunc_btf_tab. I will take a look at how easy
> > > it is to merge those two.
> >
> > So, currently during program load BTFs are parsed from file
> > descriptors and are stored in two places: env->used_btfs and
> > env->prog->aux->kfunc_btf_tab:
> >
> >   1) env->used_btfs populated only when a DW load with the
> >      (src_reg == BPF_PSEUDO_BTF_ID) flag set is performed
> >
> >   2) kfunc_btf_tab is populated by __find_kfunc_desc_btf(),
> >      and the source is attr->fd_array[offset]. The kfunc_btf_tab is
> >      sorted by offset to allow faster search
> >
> > So, to merge them something like this might be done:
> >
> >   1) If fd_array_cnt != 0, then on load create a [sorted by offset]
> >      table "used_btfs", formatted similar to kfunc_btf_tab in (2)
> >      above.
> >
> >   2) On program load change (1) to add a btf to this new sorted
> >      used_btfs. As there is no corresponding offset, just use
> >      offset=-1 (not literally like this, as bsearch() wants unique
> >      keys, so by offset=-1 an array of btfs, aka, old used_maps,
> >      should be stored)
> >
> > Looks like this, conceptually, doesn't change things too much: kfuncs
> > btfs will still be searchable in log(n) time, the "normal" btfs will
> > still be searched in used_btfs in linear time.
> >
> > (The other way is to just allow kfunc btfs to be loaded from fd_array
> > if fd_array_cnt != 0, as it is done now, but as you've mentioned
> > before, you had other use cases in mind, so this won't work.)
> >
> > > > >
> > > > > > > +        */
> > > > > > > +       if (!IS_ERR(__btf_get_by_fd(f)))
> > > > > > > +               return 0;
> > > > > > > +
> > > > > > > +       verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> > > > > > > +       return PTR_ERR(map);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> > > > > > > +{
> > > > > > > +       size_t size = sizeof(int);
> > > > > > > +       int ret;
> > > > > > > +       int fd;
> > > > > > > +       u32 i;
> > > > > > > +
> > > > > > > +       env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> > > > > > > +
> > > > > > > +       /*
> > > > > > > +        * The only difference between old (no fd_array_cnt is given) and new
> > > > > > > +        * APIs is that in the latter case the fd_array is expected to be
> > > > > > > +        * continuous and is scanned for map fds right away
> > > > > > > +        */
> > > > > > > +       if (!attr->fd_array_cnt)
> > > > > > > +               return 0;
> > > > > > > +
> > > > > > > +       for (i = 0; i < attr->fd_array_cnt; i++) {
> > > > > > > +               if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
> > > > > >
> > > > > > potential overflow in `i * size`? Do we limit fd_array_cnt anywhere to
> > > > > > less than INT_MAX/4?
> > > > >
> > > > > Right. So, probably cap to (UINT_MAX/size)?
> > > >
> > > > either that or use check_mul_overflow()
> > >
> > > Ok, will fix it, thanks.
> >
> > On the second look, there's no overflow here, as (int) * (size_t) is
> > expanded by C to (size_t), and argument is also (size_t).
> 
> What about 32-bit architectures? 64-bit ones are not a problem, of course.

Yes, sure, thanks. I added the (U32_MAX/size) limit.

BTW, the resolve_pseudo_ldimm64() also does 

        if (copy_from_bpfptr_offset(&fd,
                                    env->fd_array,
                                    insn[0].imm * sizeof(fd),
                                    sizeof(fd)))

I don't see that insn[0].imm is checked at any place,
or am I wrong?

> > However, maybe this is still makes sense to restrict the maximum size
> > of fd_array to something like (1 << 16). (The number of unique fds in
> > the end will be ~(MAX_USED_MAPS + MAX_USED_BTFS + MAX_KFUNC_BTFS).)
> >
> > > > >
> > > > > > > +                       return -EFAULT;
> > > > > > > +
> > > > > > > +               ret = add_fd_from_fd_array(env, fd);
> > > > > > > +               if (ret)
> > > > > > > +                       return ret;
> > > > > > > +       }
> > > > > > > +
> > > > > > > +       return 0;
> > > > > > > +}
> > > > > > > +
> > > > > >
> > > > > > [...]
Andrii Nakryiko Dec. 12, 2024, 5:39 p.m. UTC | #11
On Thu, Dec 12, 2024 at 9:15 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> On 24/12/10 10:18AM, Andrii Nakryiko wrote:
> > On Tue, Dec 10, 2024 at 7:57 AM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Tue, Dec 10, 2024 at 12:56 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > >
> > > > >
> > > > > This makes total sense to treat all BPF objects in fd_array the same
> > > > > way. With BTFs the problem is that, currently, a btf fd can end up
> > > > > either in used_btfs or kfunc_btf_tab. I will take a look at how easy
> > > > > it is to merge those two.
> > > >
> > > > So, currently during program load BTFs are parsed from file
> > > > descriptors and are stored in two places: env->used_btfs and
> > > > env->prog->aux->kfunc_btf_tab:
> > > >
> > > >   1) env->used_btfs populated only when a DW load with the
> > > >      (src_reg == BPF_PSEUDO_BTF_ID) flag set is performed
> > > >
> > > >   2) kfunc_btf_tab is populated by __find_kfunc_desc_btf(),
> > > >      and the source is attr->fd_array[offset]. The kfunc_btf_tab is
> > > >      sorted by offset to allow faster search
> > > >
> > > > So, to merge them something like this might be done:
> > > >
> > > >   1) If fd_array_cnt != 0, then on load create a [sorted by offset]
> > > >      table "used_btfs", formatted similar to kfunc_btf_tab in (2)
> > > >      above.
> > > >
> > > >   2) On program load change (1) to add a btf to this new sorted
> > > >      used_btfs. As there is no corresponding offset, just use
> > > >      offset=-1 (not literally like this, as bsearch() wants unique
> > > >      keys, so by offset=-1 an array of btfs, aka, old used_maps,
> > > >      should be stored)
> > > >
> > > > Looks like this, conceptually, doesn't change things too much: kfuncs
> > > > btfs will still be searchable in log(n) time, the "normal" btfs will
> > > > still be searched in used_btfs in linear time.
> > > >
> > > > (The other way is to just allow kfunc btfs to be loaded from fd_array
> > > > if fd_array_cnt != 0, as it is done now, but as you've mentioned
> > > > before, you had other use cases in mind, so this won't work.)
> > >
> > > This is getting a bit too complex.
> > > I think Andrii is asking to keep BTFs if they are in fd_array.
> > > No need to combine kfunc_btf_tab and used_btfs.
> > > I think adding BTFs from fd_array to prog->aux->used_btfs
> > > should do it.
> >
> > Exactly, no need to do major changes, let's just add those BTFs into
> > used_btfs, that's all.
>
> Added. However, I have a question here: how to add proper selftests? The btfs
> listed in env->used_btfs are later copied to prog->aux->used_btfs, and are
> never actually exposed to user-space in any way. So, one test I can think of is
>
>   * passing a btf fd in fd_array on prog load
>   * closing this btf fd and checking that id exists before closing the program
>     (requires to wait until rcu sync to be sure that the btf wasn't destroyed,
>     but still is refcounted)
>
> Is this enough?

Yeah, I think so, something minimal and simple should do, thanks.

>
> (I assume exposing used_btfs to user-space is also out of scope of this patch
> set, right?)

right
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4162afc6b5d0..2acf9b336371 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1573,6 +1573,16 @@  union bpf_attr {
 		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
 		 */
 		__s32		prog_token_fd;
+		/* The fd_array_cnt can be used to pass the length of the
+		 * fd_array array. In this case all the [map] file descriptors
+		 * passed in this array will be bound to the program, even if
+		 * the maps are not referenced directly. The functionality is
+		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
+		 * used by the verifier during the program load. If provided,
+		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
+		 * continuous.
+		 */
+		__u32		fd_array_cnt;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5684e8ce132d..4e88797fdbeb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2730,7 +2730,7 @@  static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd
+#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt
 
 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8e034a22aa2a..cda02153d90e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -19181,22 +19181,10 @@  static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 	return 0;
 }
 
-/* Add map behind fd to used maps list, if it's not already there, and return
- * its index.
- * Returns <0 on error, or >= 0 index, on success.
- */
-static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
+static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
 {
-	CLASS(fd, f)(fd);
-	struct bpf_map *map;
 	int i, err;
 
-	map = __bpf_map_get(f);
-	if (IS_ERR(map)) {
-		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
-		return PTR_ERR(map);
-	}
-
 	/* check whether we recorded this map already */
 	for (i = 0; i < env->used_map_cnt; i++)
 		if (env->used_maps[i] == map)
@@ -19227,6 +19215,24 @@  static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
 	return env->used_map_cnt - 1;
 }
 
+/* Add map behind fd to used maps list, if it's not already there, and return
+ * its index.
+ * Returns <0 on error, or >= 0 index, on success.
+ */
+static int add_used_map(struct bpf_verifier_env *env, int fd)
+{
+	struct bpf_map *map;
+	CLASS(fd, f)(fd);
+
+	map = __bpf_map_get(f);
+	if (IS_ERR(map)) {
+		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
+		return PTR_ERR(map);
+	}
+
+	return __add_used_map(env, map);
+}
+
 /* find and rewrite pseudo imm in ld_imm64 instructions:
  *
  * 1. if it accesses map FD, replace it with actual map pointer.
@@ -19318,7 +19324,7 @@  static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
 				break;
 			}
 
-			map_idx = add_used_map_from_fd(env, fd);
+			map_idx = add_used_map(env, fd);
 			if (map_idx < 0)
 				return map_idx;
 			map = env->used_maps[map_idx];
@@ -22526,6 +22532,65 @@  struct btf *bpf_get_btf_vmlinux(void)
 	return btf_vmlinux;
 }
 
+/*
+ * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
+ * this case expect that every file descriptor in the array is either a map or
+ * a BTF. Everything else is considered to be trash.
+ */
+static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
+{
+	struct bpf_map *map;
+	CLASS(fd, f)(fd);
+	int ret;
+
+	map = __bpf_map_get(f);
+	if (!IS_ERR(map)) {
+		ret = __add_used_map(env, map);
+		if (ret < 0)
+			return ret;
+		return 0;
+	}
+
+	/*
+	 * Unlike "unused" maps which do not appear in the BPF program,
+	 * BTFs are visible, so no reason to refcnt them now
+	 */
+	if (!IS_ERR(__btf_get_by_fd(f)))
+		return 0;
+
+	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
+	return PTR_ERR(map);
+}
+
+static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
+{
+	size_t size = sizeof(int);
+	int ret;
+	int fd;
+	u32 i;
+
+	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
+
+	/*
+	 * The only difference between old (no fd_array_cnt is given) and new
+	 * APIs is that in the latter case the fd_array is expected to be
+	 * continuous and is scanned for map fds right away
+	 */
+	if (!attr->fd_array_cnt)
+		return 0;
+
+	for (i = 0; i < attr->fd_array_cnt; i++) {
+		if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
+			return -EFAULT;
+
+		ret = add_fd_from_fd_array(env, fd);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
 {
 	u64 start_time = ktime_get_ns();
@@ -22557,7 +22622,6 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 		env->insn_aux_data[i].orig_idx = i;
 	env->prog = *prog;
 	env->ops = bpf_verifier_ops[env->prog->type];
-	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
 
 	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
 	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
@@ -22580,6 +22644,10 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret)
 		goto err_unlock;
 
+	ret = process_fd_array(env, attr, uattr);
+	if (ret)
+		goto err_release_maps;
+
 	mark_verifier_state_clean(env);
 
 	if (IS_ERR(btf_vmlinux)) {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4162afc6b5d0..2acf9b336371 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1573,6 +1573,16 @@  union bpf_attr {
 		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
 		 */
 		__s32		prog_token_fd;
+		/* The fd_array_cnt can be used to pass the length of the
+		 * fd_array array. In this case all the [map] file descriptors
+		 * passed in this array will be bound to the program, even if
+		 * the maps are not referenced directly. The functionality is
+		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
+		 * used by the verifier during the program load. If provided,
+		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
+		 * continuous.
+		 */
+		__u32		fd_array_cnt;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */