diff mbox series

[RFC,bpf-next,02/13] libbpf: add btf__new_split_base_ref() creating split BTF with reference base BTF

Message ID 20240322102455.98558-4-alan.maguire@oracle.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series bpf: support resilient split BTF | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/apply fail Patch does not apply to bpf-next-0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat

Commit Message

Alan Maguire March 22, 2024, 10:24 a.m. UTC
To support more robust split BTF, adding supplemental context for the
base BTF type ids that split BTF refers to is required.  Without such
references, a simple shuffling of base BTF type ids (without any other
significant change) invalidates the split BTF.  Here the attempt is made
to store additional context to make split BTF more robust.

This context comes in the form of "reference" base BTF - this base BTF
constitutes the minimal BTF representation needed to disambiguate split BTF
references to base BTF.  So for example if a struct is referred to from
split -> base BTF, a forward representation of that struct is retained
in reference base BTF, and split BTF refers to it.  Base types are kept
intact in reference base BTF, and reference types like pointers are stored
as-is.  Avoiding struct/union/enum/enum64 expansion is important to keep
reference base BTF representation to a minimum size; however anonymous
struct, union and enum[64] types are represented in full since type details
are needed to disambiguate the reference - the name is not enough in those
cases since there is no name.  In practice these are rare; in sample
cases where reference base BTF was generated for in-tree kernel modules,
only a few were needed in base reference BTF.  These represent the
anonymous struct/unions that are used by the module but were de-duplicated
to use base vmlinux BTF ids instead.

When successful, a new representation of the split BTF passed in is
returned.  It refers to the reference BTF as its base, and both returned
split and base BTF need to be freed by the caller.

So to take a simple example, with split BTF with a type referring
to "struct sk_buff", we will generate base reference BTF with a
FWD struct sk_buff, and the split BTF will refer to it instead.

Tools like pahole can utilize such split BTF to popuate the .BTF section
(split BTF) and an additional .BTF.base_ref section (base reference BTF).
Then when the split BTF is loaded, the base reference BTF can be used
to reconcile split BTF and the current - and possibly changed - base BTF.

So for example if "struct sk_buff" was id 502 when the split BTF was
originally generated,  we can use the reference base BTF to see that
id 502 refers to a "struct sk_buff" and replace instances of id 502
with the current (reconciled) base BTF sk_buff type id.

Base reference BTF is small; when building a kernel with all modules
using base reference BTF as a test, the average size for module
base reference BTF is 1555 bytes (standard deviation 1563).  The
maximum base reference BTF size across ~2700 modules was 37895 bytes.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
 tools/lib/bpf/btf.c      | 215 +++++++++++++++++++++++++++++++++++++--
 tools/lib/bpf/btf.h      |  15 +++
 tools/lib/bpf/libbpf.map |   1 +
 3 files changed, 225 insertions(+), 6 deletions(-)

Comments

Andrii Nakryiko March 29, 2024, 10 p.m. UTC | #1
On Fri, Mar 22, 2024 at 3:25 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> To support more robust split BTF, adding supplemental context for the
> base BTF type ids that split BTF refers to is required.  Without such
> references, a simple shuffling of base BTF type ids (without any other
> significant change) invalidates the split BTF.  Here the attempt is made
> to store additional context to make split BTF more robust.
>
> This context comes in the form of "reference" base BTF - this base BTF
> constitutes the minimal BTF representation needed to disambiguate split BTF
> references to base BTF.  So for example if a struct is referred to from
> split -> base BTF, a forward representation of that struct is retained

so you decided to not bother with recording expected size? Remember we
were talking about situations where vmlinux struct can be embedded
inside the kernel module's struct. In this case *exact* struct size is
very important and can't be changed without breaking compatibility.
Shouldn't we keep track of this?

> in reference base BTF, and split BTF refers to it.  Base types are kept
> intact in reference base BTF, and reference types like pointers are stored
> as-is.  Avoiding struct/union/enum/enum64 expansion is important to keep
> reference base BTF representation to a minimum size; however anonymous
> struct, union and enum[64] types are represented in full since type details
> are needed to disambiguate the reference - the name is not enough in those
> cases since there is no name.  In practice these are rare; in sample
> cases where reference base BTF was generated for in-tree kernel modules,
> only a few were needed in base reference BTF.  These represent the
> anonymous struct/unions that are used by the module but were de-duplicated
> to use base vmlinux BTF ids instead.
>
> When successful, a new representation of the split BTF passed in is
> returned.  It refers to the reference BTF as its base, and both returned
> split and base BTF need to be freed by the caller.
>
> So to take a simple example, with split BTF with a type referring
> to "struct sk_buff", we will generate base reference BTF with a
> FWD struct sk_buff, and the split BTF will refer to it instead.
>
> Tools like pahole can utilize such split BTF to popuate the .BTF section

typo: populate

> (split BTF) and an additional .BTF.base_ref section (base reference BTF).
> Then when the split BTF is loaded, the base reference BTF can be used
> to reconcile split BTF and the current - and possibly changed - base BTF.
>
> So for example if "struct sk_buff" was id 502 when the split BTF was
> originally generated,  we can use the reference base BTF to see that
> id 502 refers to a "struct sk_buff" and replace instances of id 502
> with the current (reconciled) base BTF sk_buff type id.
>
> Base reference BTF is small; when building a kernel with all modules
> using base reference BTF as a test, the average size for module
> base reference BTF is 1555 bytes (standard deviation 1563).  The
> maximum base reference BTF size across ~2700 modules was 37895 bytes.
>
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>  tools/lib/bpf/btf.c      | 215 +++++++++++++++++++++++++++++++++++++--
>  tools/lib/bpf/btf.h      |  15 +++
>  tools/lib/bpf/libbpf.map |   1 +
>  3 files changed, 225 insertions(+), 6 deletions(-)
>

[...]

> +static int btf_add_ref_type_ids(__u32 *id, void *ctx)
> +{
> +       struct btf_ref *ref = ctx;
> +       struct btf_type *t = btf_type_by_id(ref->pipe.src, *id);
> +       int ret;
> +
> +       /* split BTF id, not needed */
> +       if (*id > ref->nr_base_types)
> +               return 0;
> +       /* already added ? */
> +       if (ref->ids[*id] <= BTF_MAX_NR_TYPES)
> +               return 0;
> +       ref->ids[*id] = *id;
> +
> +       /* struct/union members not needed, except for anonymous structs
> +        * and unions, which we need since name won't help us determine
> +        * matches; so if a named struct/union, no need to recurse
> +        * into members.
> +        */
> +       if (btf_is_eligible_named_fwd(t))
> +               return 0;

Let's be a bit more robust here. There are three cases for BTF types
(kinds) in "base reference BTF": forward-declared (non-anonymous
structs/unions/enums), copied-as-is (anonymous
struct/union/enum/func_proto, plus INT/FLOAT; maybe something else),
but also there is a third kind that shouldn't be referenced from split
BTF (datasec, funcs, vars, etc). I'd like us to be strict in checking
that we don't accidentally and silently support the latter. It should
mean a misuse of this approach and we should error out in this case.

> +
> +       /* ensure references in type are added also. */
> +       ret = btf_type_visit_type_ids(t, btf_add_ref_type_ids, ctx);
> +       if (ret < 0)
> +               return ret;
> +       return 0;
> +}
> +
> +/* All split BTF ids will be shifted downwards since there are less base BTF
> + * in reference base BTF, and for those that refer to base BTF, we use the
> + * reference map to map from original base BTF to reference base BTF id.
> + */
> +static int btf_update_ref_type_ids(__u32 *id, void *ctx)
> +{
> +       struct btf_ref *ref = ctx;
> +
> +       if (*id >= ref->nr_base_types)
> +               *id -= ref->diff_id;
> +       else
> +               *id = ref->ids[*id];
> +       return 0;
> +}
> +
> +/* Create updated split BTF with "reference" base BTF; reference base BTF
> + * consists of BTF information required to clarify the types that split
> + * BTF refers to, omitting unneeded details.  Specifically it will contain
> + * base types and forward declarations of structs, unions and enumerated
> + * types, along with associated reference types like pointers, arrays etc.
> + *
> + * The only case where structs, unions or enumerated types are fully represented
> + * is when they are anonymous; in such cases, info about type content is needed
> + * to clarify type references.
> + *
> + * We return newly-created split BTF where the split BTf refers to a newly-created
> + * base reference BTF. Both must be freed separately by the caller.
> + *
> + * When creating the BTF representation for a module and provided with the
> + * base_ref option, pahole will create split BTF using this API, and store
> + * the base reference BTF in the .BTF.base.reference section.
> + */
> +struct btf *btf__new_split_base_ref(struct btf *src_btf)

I find this name very confusing, for some reason. btf__new*/btf__parse
is generally used to either create a new empty instance, or just
parse/index pre-existing BTF information. In this case I feel like
it's actually a different operation, where we create a derived BTF, so
I'd name it distinctively. How do you feel about something with
"distill" terminology as an action performed here. E.g.,
btf__distill_base() or something like that?

The API contract is also suboptimal, given back split BTF where base
BTF is owned (and has to be freed). Let's keep it a bit more explicit,
something like:

int btf__distill_base(struct btf *new_base_btf, struct btf *new_split_btf);

WDYT?

> +{
> +       unsigned int n = btf__type_cnt(src_btf);
> +       const struct btf_type *t;
> +       struct btf_ref ref = {};
> +       struct btf *base_btf = NULL, *split_btf = NULL;
> +       __u32 i, id = 0;
> +       int ret = 0;
> +
> +       /* src BTF must be split BTF. */
> +       if (!btf__base_btf(src_btf)) {
> +               errno = -EINVAL;
> +               return NULL;
> +       }
> +       base_btf = btf__new_empty();

I was confused, it's baseref_btf or something, we already have
"base_btf", which is coming from btf__base_btf(src_btf), so let's
distinguish all those "base" BTFs?

> +       if (!base_btf)
> +               return NULL;
> +       ref.ids = calloc(n, sizeof(*ref.ids));
> +       if (!ref.ids) {
> +               ret = -ENOMEM;
> +               goto err_out;
> +       }
> +       for (i = 1; i < n; i++)
> +               ref.ids[i] = BTF_UNPROCESSED_ID;

by now we generally expect BTF IDs to be a positive integer, so I'd
use int for ids and just use explicit -1, and then a bit more natural
check for >= 0 in btf_add_ref_type_ids

> +       ref.pipe.src = src_btf;
> +       ref.pipe.dst = base_btf;
> +       ref.pipe.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
> +       if (IS_ERR(ref.pipe.str_off_map)) {
> +               ret = -ENOMEM;
> +               goto err_out;
> +       }

[...]

> +                       default:
> +                               pr_warn("unexpected kind [%u] when creating base reference BTF.\n",
> +                                       btf_kind(t));
> +                               goto err_out;
> +                       }
> +                       ret = btf__add_fwd(base_btf, btf__name_by_offset(src_btf, t->name_off),
> +                                          fwd_kind);
> +               } else {
> +                       ret = btf_add_type(&ref.pipe, t);

see comment above, we should detect erroneous uses of referencing
"non-relocatable" kinds like VAR/DATASEC

> +               }
> +               if (ret < 0)
> +                       goto err_out;
> +               ref.ids[i] = ++id;
> +       }
> +       /* now create new split BTF with reference base BTF as its base; we end up with
> +        * split BTF that has base BTF that represents enough about its base references
> +        * to allow it to be reconciled with the base BTF available.
> +        */
> +       split_btf = btf__new_empty_split(base_btf);
> +       if (!split_btf)
> +               goto err_out;
> +
> +       ref.pipe.dst = split_btf;
> +       /* all split BTF ids will be shifted downwards since there are less base BTF ids
> +        * in reference base BTF.
> +        */
> +       ref.diff_id = ref.nr_base_types - btf__type_cnt(base_btf);
> +
> +       /* First add all split types */
> +       for (i = src_btf->start_id; i < n; i++) {
> +               t = btf_type_by_id(src_btf, i);
> +               ret = btf_add_type(&ref.pipe, t);
> +               if (ret < 0)
> +                       goto err_out;
> +       }
> +       /* Now update base/split BTF ids. */
> +       for (i = 1; i < btf__type_cnt(split_btf); i++) {

nit: cache this btf__type_cnt() result, no need to keep calling it all the time

> +               t = btf_type_by_id(split_btf, i);
> +
> +               ret = btf_type_visit_type_ids((struct btf_type *)t,  btf_update_ref_type_ids, &ref);

nit: it's a bit annoying to cast t here, given btf_type_by_id()
returns non-const type as you need it. Just mark t as non-const and
use btf_type_by_id() everywhere?



> +               if (ret < 0)
> +                       goto err_out;
> +       }
> +       free(ref.ids);
> +       hashmap__free(ref.pipe.str_off_map);
> +       return split_btf;

[...]
Alan Maguire April 4, 2024, 3:21 p.m. UTC | #2
On 29/03/2024 22:00, Andrii Nakryiko wrote:
> On Fri, Mar 22, 2024 at 3:25 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>>
>> To support more robust split BTF, adding supplemental context for the
>> base BTF type ids that split BTF refers to is required.  Without such
>> references, a simple shuffling of base BTF type ids (without any other
>> significant change) invalidates the split BTF.  Here the attempt is made
>> to store additional context to make split BTF more robust.
>>
>> This context comes in the form of "reference" base BTF - this base BTF
>> constitutes the minimal BTF representation needed to disambiguate split BTF
>> references to base BTF.  So for example if a struct is referred to from
>> split -> base BTF, a forward representation of that struct is retained
> 
> so you decided to not bother with recording expected size? Remember we
> were talking about situations where vmlinux struct can be embedded
> inside the kernel module's struct. In this case *exact* struct size is
> very important and can't be changed without breaking compatibility.
> Shouldn't we keep track of this?
>

There are two cases here I think. The embedded case - which is rare, but
needs size tracking - and the non-embedded case, where a struct/union
can potentially change size without invalidating the module's
assumptions. In the former, your suggestion about having an empty
(sized) struct instead of a forward would work well I think. The only
question is around the non-embedded case. We could either use the same
approach (but then ignore the size constraints during reconciliation) or
continue to use FWDs. The advantage of continuing to use FWDs is we
wouldn't have to compute struct embeddedness during reconciliation, and
the BTF itself would be a bit more informative about the constraints
(FWD means no constraints, empty sized struct means we care about size
due to embeddedness). What do you think?

>> in reference base BTF, and split BTF refers to it.  Base types are kept
>> intact in reference base BTF, and reference types like pointers are stored
>> as-is.  Avoiding struct/union/enum/enum64 expansion is important to keep
>> reference base BTF representation to a minimum size; however anonymous
>> struct, union and enum[64] types are represented in full since type details
>> are needed to disambiguate the reference - the name is not enough in those
>> cases since there is no name.  In practice these are rare; in sample
>> cases where reference base BTF was generated for in-tree kernel modules,
>> only a few were needed in base reference BTF.  These represent the
>> anonymous struct/unions that are used by the module but were de-duplicated
>> to use base vmlinux BTF ids instead.
>>
>> When successful, a new representation of the split BTF passed in is
>> returned.  It refers to the reference BTF as its base, and both returned
>> split and base BTF need to be freed by the caller.
>>
>> So to take a simple example, with split BTF with a type referring
>> to "struct sk_buff", we will generate base reference BTF with a
>> FWD struct sk_buff, and the split BTF will refer to it instead.
>>
>> Tools like pahole can utilize such split BTF to popuate the .BTF section
> 
> typo: populate
>

Will fix, thanks.

>> (split BTF) and an additional .BTF.base_ref section (base reference BTF).
>> Then when the split BTF is loaded, the base reference BTF can be used
>> to reconcile split BTF and the current - and possibly changed - base BTF.
>>
>> So for example if "struct sk_buff" was id 502 when the split BTF was
>> originally generated,  we can use the reference base BTF to see that
>> id 502 refers to a "struct sk_buff" and replace instances of id 502
>> with the current (reconciled) base BTF sk_buff type id.
>>
>> Base reference BTF is small; when building a kernel with all modules
>> using base reference BTF as a test, the average size for module
>> base reference BTF is 1555 bytes (standard deviation 1563).  The
>> maximum base reference BTF size across ~2700 modules was 37895 bytes.
>>
>> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
>> ---
>>  tools/lib/bpf/btf.c      | 215 +++++++++++++++++++++++++++++++++++++--
>>  tools/lib/bpf/btf.h      |  15 +++
>>  tools/lib/bpf/libbpf.map |   1 +
>>  3 files changed, 225 insertions(+), 6 deletions(-)
>>
> 
> [...]
> 
>> +static int btf_add_ref_type_ids(__u32 *id, void *ctx)
>> +{
>> +       struct btf_ref *ref = ctx;
>> +       struct btf_type *t = btf_type_by_id(ref->pipe.src, *id);
>> +       int ret;
>> +
>> +       /* split BTF id, not needed */
>> +       if (*id > ref->nr_base_types)
>> +               return 0;
>> +       /* already added ? */
>> +       if (ref->ids[*id] <= BTF_MAX_NR_TYPES)
>> +               return 0;
>> +       ref->ids[*id] = *id;
>> +
>> +       /* struct/union members not needed, except for anonymous structs
>> +        * and unions, which we need since name won't help us determine
>> +        * matches; so if a named struct/union, no need to recurse
>> +        * into members.
>> +        */
>> +       if (btf_is_eligible_named_fwd(t))
>> +               return 0;
> 
> Let's be a bit more robust here. There are three cases for BTF types
> (kinds) in "base reference BTF": forward-declared (non-anonymous
> structs/unions/enums), copied-as-is (anonymous
> struct/union/enum/func_proto, plus INT/FLOAT; maybe something else),
> but also there is a third kind that shouldn't be referenced from split
> BTF (datasec, funcs, vars, etc). I'd like us to be strict in checking
> that we don't accidentally and silently support the latter. It should
> mean a misuse of this approach and we should error out in this case.
> 

Sounds good. I'll add that for v2.

>> +
>> +       /* ensure references in type are added also. */
>> +       ret = btf_type_visit_type_ids(t, btf_add_ref_type_ids, ctx);
>> +       if (ret < 0)
>> +               return ret;
>> +       return 0;
>> +}
>> +
>> +/* All split BTF ids will be shifted downwards since there are less base BTF
>> + * in reference base BTF, and for those that refer to base BTF, we use the
>> + * reference map to map from original base BTF to reference base BTF id.
>> + */
>> +static int btf_update_ref_type_ids(__u32 *id, void *ctx)
>> +{
>> +       struct btf_ref *ref = ctx;
>> +
>> +       if (*id >= ref->nr_base_types)
>> +               *id -= ref->diff_id;
>> +       else
>> +               *id = ref->ids[*id];
>> +       return 0;
>> +}
>> +
>> +/* Create updated split BTF with "reference" base BTF; reference base BTF
>> + * consists of BTF information required to clarify the types that split
>> + * BTF refers to, omitting unneeded details.  Specifically it will contain
>> + * base types and forward declarations of structs, unions and enumerated
>> + * types, along with associated reference types like pointers, arrays etc.
>> + *
>> + * The only case where structs, unions or enumerated types are fully represented
>> + * is when they are anonymous; in such cases, info about type content is needed
>> + * to clarify type references.
>> + *
>> + * We return newly-created split BTF where the split BTf refers to a newly-created
>> + * base reference BTF. Both must be freed separately by the caller.
>> + *
>> + * When creating the BTF representation for a module and provided with the
>> + * base_ref option, pahole will create split BTF using this API, and store
>> + * the base reference BTF in the .BTF.base.reference section.
>> + */
>> +struct btf *btf__new_split_base_ref(struct btf *src_btf)
> 
> I find this name very confusing, for some reason. btf__new*/btf__parse
> is generally used to either create a new empty instance, or just
> parse/index pre-existing BTF information. In this case I feel like
> it's actually a different operation, where we create a derived BTF, so
> I'd name it distinctively. How do you feel about something with
> "distill" terminology as an action performed here. E.g.,
> btf__distill_base() or something like that?
> 
> The API contract is also suboptimal, given back split BTF where base
> BTF is owned (and has to be freed). Let's keep it a bit more explicit,
> something like:
> 
> int btf__distill_base(struct btf *new_base_btf, struct btf *new_split_btf);
> 
> WDYT?
> 

Yeah, any suggestions around naming are very welcome! My first choice
was "minimal base" but that seems to prioritize the minimization, which
is really just a side-effect of the desire to add context to the
references in the split BTF. "base reference" feels clumsy to me too.
Ideally we want to find a name that signals that the base BTF is a
companion to the associated split BTF, adding context for it. Describing
the operation as distilling certainly connotes that we're removing
non-essential info; so we'd end up with

int btf__distill_base(const struct btf *src_btf, struct btf
**new_base_btf, struct btf **new_split_btf);

...and we could call the section .BTF.distilled.base perhaps.



>> +{
>> +       unsigned int n = btf__type_cnt(src_btf);
>> +       const struct btf_type *t;
>> +       struct btf_ref ref = {};
>> +       struct btf *base_btf = NULL, *split_btf = NULL;
>> +       __u32 i, id = 0;
>> +       int ret = 0;
>> +
>> +       /* src BTF must be split BTF. */
>> +       if (!btf__base_btf(src_btf)) {
>> +               errno = -EINVAL;
>> +               return NULL;
>> +       }
>> +       base_btf = btf__new_empty();
> 
> I was confused, it's baseref_btf or something, we already have
> "base_btf", which is coming from btf__base_btf(src_btf), so let's
> distinguish all those "base" BTFs?
>

Will do.

>> +       if (!base_btf)
>> +               return NULL;
>> +       ref.ids = calloc(n, sizeof(*ref.ids));
>> +       if (!ref.ids) {
>> +               ret = -ENOMEM;
>> +               goto err_out;
>> +       }
>> +       for (i = 1; i < n; i++)
>> +               ref.ids[i] = BTF_UNPROCESSED_ID;
> 
> by now we generally expect BTF IDs to be a positive integer, so I'd
> use int for ids and just use explicit -1, and then a bit more natural
> check for >= 0 in btf_add_ref_type_ids
> 

Sure.

>> +       ref.pipe.src = src_btf;
>> +       ref.pipe.dst = base_btf;
>> +       ref.pipe.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
>> +       if (IS_ERR(ref.pipe.str_off_map)) {
>> +               ret = -ENOMEM;
>> +               goto err_out;
>> +       }
> 
> [...]
> 
>> +                       default:
>> +                               pr_warn("unexpected kind [%u] when creating base reference BTF.\n",
>> +                                       btf_kind(t));
>> +                               goto err_out;
>> +                       }
>> +                       ret = btf__add_fwd(base_btf, btf__name_by_offset(src_btf, t->name_off),
>> +                                          fwd_kind);
>> +               } else {
>> +                       ret = btf_add_type(&ref.pipe, t);
> 
> see comment above, we should detect erroneous uses of referencing
> "non-relocatable" kinds like VAR/DATASEC
> 
>> +               }
>> +               if (ret < 0)
>> +                       goto err_out;
>> +               ref.ids[i] = ++id;
>> +       }
>> +       /* now create new split BTF with reference base BTF as its base; we end up with
>> +        * split BTF that has base BTF that represents enough about its base references
>> +        * to allow it to be reconciled with the base BTF available.
>> +        */
>> +       split_btf = btf__new_empty_split(base_btf);
>> +       if (!split_btf)
>> +               goto err_out;
>> +
>> +       ref.pipe.dst = split_btf;
>> +       /* all split BTF ids will be shifted downwards since there are less base BTF ids
>> +        * in reference base BTF.
>> +        */
>> +       ref.diff_id = ref.nr_base_types - btf__type_cnt(base_btf);
>> +
>> +       /* First add all split types */
>> +       for (i = src_btf->start_id; i < n; i++) {
>> +               t = btf_type_by_id(src_btf, i);
>> +               ret = btf_add_type(&ref.pipe, t);
>> +               if (ret < 0)
>> +                       goto err_out;
>> +       }
>> +       /* Now update base/split BTF ids. */
>> +       for (i = 1; i < btf__type_cnt(split_btf); i++) {
> 
> nit: cache this btf__type_cnt() result, no need to keep calling it all the time
>

Will do.

>> +               t = btf_type_by_id(split_btf, i);
>> +
>> +               ret = btf_type_visit_type_ids((struct btf_type *)t,  btf_update_ref_type_ids, &ref);
> 
> nit: it's a bit annoying to cast t here, given btf_type_by_id()
> returns non-const type as you need it. Just mark t as non-const and
> use btf_type_by_id() everywhere?
>

Sure.

> 
> 
>> +               if (ret < 0)
>> +                       goto err_out;
>> +       }
>> +       free(ref.ids);
>> +       hashmap__free(ref.pipe.str_off_map);
>> +       return split_btf;
> 
> [...]
Andrii Nakryiko April 4, 2024, 10:49 p.m. UTC | #3
On Thu, Apr 4, 2024 at 8:22 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> On 29/03/2024 22:00, Andrii Nakryiko wrote:
> > On Fri, Mar 22, 2024 at 3:25 AM Alan Maguire <alan.maguire@oracle.com> wrote:
> >>
> >> To support more robust split BTF, adding supplemental context for the
> >> base BTF type ids that split BTF refers to is required.  Without such
> >> references, a simple shuffling of base BTF type ids (without any other
> >> significant change) invalidates the split BTF.  Here the attempt is made
> >> to store additional context to make split BTF more robust.
> >>
> >> This context comes in the form of "reference" base BTF - this base BTF
> >> constitutes the minimal BTF representation needed to disambiguate split BTF
> >> references to base BTF.  So for example if a struct is referred to from
> >> split -> base BTF, a forward representation of that struct is retained
> >
> > so you decided to not bother with recording expected size? Remember we
> > were talking about situations where vmlinux struct can be embedded
> > inside the kernel module's struct. In this case *exact* struct size is
> > very important and can't be changed without breaking compatibility.
> > Shouldn't we keep track of this?
> >
>
> There are two cases here I think. The embedded case - which is rare, but
> needs size tracking - and the non-embedded case, where a struct/union
> can potentially change size without invalidating the module's
> assumptions. In the former, your suggestion about having an empty
> (sized) struct instead of a forward would work well I think. The only
> question is around the non-embedded case. We could either use the same
> approach (but then ignore the size constraints during reconciliation) or
> continue to use FWDs. The advantage of continuing to use FWDs is we
> wouldn't have to compute struct embeddedness during reconciliation, and
> the BTF itself would be a bit more informative about the constraints
> (FWD means no constraints, empty sized struct means we care about size
> due to embeddedness). What do you think?

Sure. I was going to propose using size zero, but FWD works just as
fine and avoids the ambiguity of embedding zero-sized structs. The
important part is to check and enforce size where it's important.

>
> >> in reference base BTF, and split BTF refers to it.  Base types are kept
> >> intact in reference base BTF, and reference types like pointers are stored
> >> as-is.  Avoiding struct/union/enum/enum64 expansion is important to keep
> >> reference base BTF representation to a minimum size; however anonymous
> >> struct, union and enum[64] types are represented in full since type details
> >> are needed to disambiguate the reference - the name is not enough in those
> >> cases since there is no name.  In practice these are rare; in sample
> >> cases where reference base BTF was generated for in-tree kernel modules,
> >> only a few were needed in base reference BTF.  These represent the
> >> anonymous struct/unions that are used by the module but were de-duplicated
> >> to use base vmlinux BTF ids instead.
> >>

[...]

> >> +/* Create updated split BTF with "reference" base BTF; reference base BTF
> >> + * consists of BTF information required to clarify the types that split
> >> + * BTF refers to, omitting unneeded details.  Specifically it will contain
> >> + * base types and forward declarations of structs, unions and enumerated
> >> + * types, along with associated reference types like pointers, arrays etc.
> >> + *
> >> + * The only case where structs, unions or enumerated types are fully represented
> >> + * is when they are anonymous; in such cases, info about type content is needed
> >> + * to clarify type references.
> >> + *
> >> + * We return newly-created split BTF where the split BTf refers to a newly-created
> >> + * base reference BTF. Both must be freed separately by the caller.
> >> + *
> >> + * When creating the BTF representation for a module and provided with the
> >> + * base_ref option, pahole will create split BTF using this API, and store
> >> + * the base reference BTF in the .BTF.base.reference section.
> >> + */
> >> +struct btf *btf__new_split_base_ref(struct btf *src_btf)
> >
> > I find this name very confusing, for some reason. btf__new*/btf__parse
> > is generally used to either create a new empty instance, or just
> > parse/index pre-existing BTF information. In this case I feel like
> > it's actually a different operation, where we create a derived BTF, so
> > I'd name it distinctively. How do you feel about something with
> > "distill" terminology as an action performed here. E.g.,
> > btf__distill_base() or something like that?
> >
> > The API contract is also suboptimal, given back split BTF where base
> > BTF is owned (and has to be freed). Let's keep it a bit more explicit,
> > something like:
> >
> > int btf__distill_base(struct btf *new_base_btf, struct btf *new_split_btf);
> >
> > WDYT?
> >
>
> Yeah, any suggestions around naming are very welcome! My first choice
> was "minimal base" but that seems to prioritize the minimization, which
> is really just a side-effect of the desire to add context to the
> references in the split BTF. "base reference" feels clumsy to me too.
> Ideally we want to find a name that signals that the base BTF is a
> companion to the associated split BTF, adding context for it. Describing
> the operation as distilling certainly connotes that we're removing
> non-essential info; so we'd end up with
>
> int btf__distill_base(const struct btf *src_btf, struct btf
> **new_base_btf, struct btf **new_split_btf);
>
> ...and we could call the section .BTF.distilled.base perhaps.
>

We can get cute with analogies, but I'm sure people will hate it. :)

What do you get after you remove all the non-essential cruft
(distilling the essence)? A "nugget" of goodness... ;)

I'd keep it short as .BTF.base (and know that it's this "distilled"
base BTF, not a real one), but I don't feel very strongly about this.

>
>
> >> +{
> >> +       unsigned int n = btf__type_cnt(src_btf);
> >> +       const struct btf_type *t;
> >> +       struct btf_ref ref = {};
> >> +       struct btf *base_btf = NULL, *split_btf = NULL;
> >> +       __u32 i, id = 0;
> >> +       int ret = 0;
> >> +
> >> +       /* src BTF must be split BTF. */
> >> +       if (!btf__base_btf(src_btf)) {
> >> +               errno = -EINVAL;
> >> +               return NULL;
> >> +       }
> >> +       base_btf = btf__new_empty();
> >

[...]
diff mbox series

Patch

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index dcff6c29c851..f02477af3d79 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1771,9 +1771,8 @@  static int btf_rewrite_str(__u32 *str_off, void *ctx)
 	return 0;
 }
 
-int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
+static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type)
 {
-	struct btf_pipe p = { .src = src_btf, .dst = btf };
 	struct btf_type *t;
 	int sz, err;
 
@@ -1782,20 +1781,27 @@  int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t
 		return libbpf_err(sz);
 
 	/* deconstruct BTF, if necessary, and invalidate raw_data */
-	if (btf_ensure_modifiable(btf))
+	if (btf_ensure_modifiable(p->dst))
 		return libbpf_err(-ENOMEM);
 
-	t = btf_add_type_mem(btf, sz);
+	t = btf_add_type_mem(p->dst, sz);
 	if (!t)
 		return libbpf_err(-ENOMEM);
 
 	memcpy(t, src_type, sz);
 
-	err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
+	err = btf_type_visit_str_offs(t, btf_rewrite_str, p);
 	if (err)
 		return libbpf_err(err);
 
-	return btf_commit_type(btf, sz);
+	return btf_commit_type(p->dst, sz);
+}
+
+int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
+{
+	struct btf_pipe p = { .src = src_btf, .dst = btf };
+
+	return btf_add_type(&p, src_type);
 }
 
 static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
@@ -5217,3 +5223,200 @@  int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
 
 	return 0;
 }
+
+struct btf_ref {
+	struct btf_pipe pipe;
+	__u32 *ids;
+	unsigned int nr_base_types;
+	unsigned int diff_id;
+};
+
+static bool btf_is_eligible_named_fwd(const struct btf_type *t)
+{
+	return (btf_is_composite(t) || btf_is_any_enum(t)) && t->name_off != 0;
+}
+
+static int btf_add_ref_type_ids(__u32 *id, void *ctx)
+{
+	struct btf_ref *ref = ctx;
+	struct btf_type *t = btf_type_by_id(ref->pipe.src, *id);
+	int ret;
+
+	/* split BTF id, not needed */
+	if (*id > ref->nr_base_types)
+		return 0;
+	/* already added ? */
+	if (ref->ids[*id] <= BTF_MAX_NR_TYPES)
+		return 0;
+	ref->ids[*id] = *id;
+
+	/* struct/union members not needed, except for anonymous structs
+	 * and unions, which we need since name won't help us determine
+	 * matches; so if a named struct/union, no need to recurse
+	 * into members.
+	 */
+	if (btf_is_eligible_named_fwd(t))
+		return 0;
+
+	/* ensure references in type are added also. */
+	ret = btf_type_visit_type_ids(t, btf_add_ref_type_ids, ctx);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+/* All split BTF ids will be shifted downwards since there are less base BTF
+ * in reference base BTF, and for those that refer to base BTF, we use the
+ * reference map to map from original base BTF to reference base BTF id.
+ */
+static int btf_update_ref_type_ids(__u32 *id, void *ctx)
+{
+	struct btf_ref *ref = ctx;
+
+	if (*id >= ref->nr_base_types)
+		*id -= ref->diff_id;
+	else
+		*id = ref->ids[*id];
+	return 0;
+}
+
+/* Create updated split BTF with "reference" base BTF; reference base BTF
+ * consists of BTF information required to clarify the types that split
+ * BTF refers to, omitting unneeded details.  Specifically it will contain
+ * base types and forward declarations of structs, unions and enumerated
+ * types, along with associated reference types like pointers, arrays etc.
+ *
+ * The only case where structs, unions or enumerated types are fully represented
+ * is when they are anonymous; in such cases, info about type content is needed
+ * to clarify type references.
+ *
+ * We return newly-created split BTF where the split BTf refers to a newly-created
+ * base reference BTF. Both must be freed separately by the caller.
+ *
+ * When creating the BTF representation for a module and provided with the
+ * base_ref option, pahole will create split BTF using this API, and store
+ * the base reference BTF in the .BTF.base.reference section.
+ */
+struct btf *btf__new_split_base_ref(struct btf *src_btf)
+{
+	unsigned int n = btf__type_cnt(src_btf);
+	const struct btf_type *t;
+	struct btf_ref ref = {};
+	struct btf *base_btf = NULL, *split_btf = NULL;
+	__u32 i, id = 0;
+	int ret = 0;
+
+	/* src BTF must be split BTF. */
+	if (!btf__base_btf(src_btf)) {
+		errno = -EINVAL;
+		return NULL;
+	}
+	base_btf = btf__new_empty();
+	if (!base_btf)
+		return NULL;
+	ref.ids = calloc(n, sizeof(*ref.ids));
+	if (!ref.ids) {
+		ret = -ENOMEM;
+		goto err_out;
+	}
+	for (i = 1; i < n; i++)
+		ref.ids[i] = BTF_UNPROCESSED_ID;
+	ref.pipe.src = src_btf;
+	ref.pipe.dst = base_btf;
+	ref.pipe.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+	if (IS_ERR(ref.pipe.str_off_map)) {
+		ret = -ENOMEM;
+		goto err_out;
+	}
+	ref.nr_base_types = btf__type_cnt(btf__base_btf(src_btf));
+
+	/* Pass over src split BTF; generate the list of base BTF
+	 * type ids it references; these will constitute our base
+	 * reference BTF set.
+	 */
+	for (i = src_btf->start_id; i < n; i++) {
+		t = btf__type_by_id(src_btf, i);
+
+		ret = btf_type_visit_type_ids((struct btf_type *)t,  btf_add_ref_type_ids, &ref);
+		if (ret < 0)
+			goto err_out;
+	}
+	/* Next add types for each of the required references. */
+	for (i = 1; i < src_btf->start_id; i++) {
+		if (ref.ids[i] > BTF_MAX_NR_TYPES)
+			continue;
+		t = btf_type_by_id(src_btf, i);
+		/* for named struct/unions/enums, use a fwd since we can match
+		 * via name without details.
+		 */
+		if (btf_is_eligible_named_fwd(t)) {
+			enum btf_fwd_kind fwd_kind;
+
+			switch (btf_kind(t)) {
+			case BTF_KIND_STRUCT:
+				fwd_kind = BTF_FWD_STRUCT;
+				break;
+			case BTF_KIND_UNION:
+				fwd_kind = BTF_FWD_UNION;
+				break;
+			case BTF_KIND_ENUM:
+				fwd_kind = BTF_FWD_ENUM;
+				break;
+			case BTF_KIND_ENUM64:
+				fwd_kind = BTF_FWD_ENUM64;
+				break;
+			default:
+				pr_warn("unexpected kind [%u] when creating base reference BTF.\n",
+					btf_kind(t));
+				goto err_out;
+			}
+			ret = btf__add_fwd(base_btf, btf__name_by_offset(src_btf, t->name_off),
+					   fwd_kind);
+		} else {
+			ret = btf_add_type(&ref.pipe, t);
+		}
+		if (ret < 0)
+			goto err_out;
+		ref.ids[i] = ++id;
+	}
+	/* now create new split BTF with reference base BTF as its base; we end up with
+	 * split BTF that has base BTF that represents enough about its base references
+	 * to allow it to be reconciled with the base BTF available.
+	 */
+	split_btf = btf__new_empty_split(base_btf);
+	if (!split_btf)
+		goto err_out;
+
+	ref.pipe.dst = split_btf;
+	/* all split BTF ids will be shifted downwards since there are less base BTF ids
+	 * in reference base BTF.
+	 */
+	ref.diff_id = ref.nr_base_types - btf__type_cnt(base_btf);
+
+	/* First add all split types */
+	for (i = src_btf->start_id; i < n; i++) {
+		t = btf_type_by_id(src_btf, i);
+		ret = btf_add_type(&ref.pipe, t);
+		if (ret < 0)
+			goto err_out;
+	}
+	/* Now update base/split BTF ids. */
+	for (i = 1; i < btf__type_cnt(split_btf); i++) {
+		t = btf_type_by_id(split_btf, i);
+
+		ret = btf_type_visit_type_ids((struct btf_type *)t,  btf_update_ref_type_ids, &ref);
+		if (ret < 0)
+			goto err_out;
+	}
+	free(ref.ids);
+	hashmap__free(ref.pipe.str_off_map);
+	return split_btf;
+err_out:
+	free(ref.ids);
+	hashmap__free(ref.pipe.str_off_map);
+	btf__free(split_btf);
+	btf__free(base_btf);
+	if (ret < 0)
+		errno = -ret;
+	return NULL;
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 47d3e00b25c7..f2a853d6fa55 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -107,6 +107,21 @@  LIBBPF_API struct btf *btf__new_empty(void);
  */
 LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
 
+/**
+ * @brief **btf__new_split_base_ref()** creates a new version of the
+ * split BTF *src_btf* passed in.  Its new base BTF will only contain
+ * the types needed to improve robustness of the split BTF to
+ * small changes in base BTF.  When that split BTF is loaded against
+ * a (possibly changed) base, this reference base BTF will help update
+ * references to that (possibly changed) base BTF.
+ *
+ * Both the new split and its reference base BTF it is built on top of
+ * must be freed by the caller.
+ *
+ * On error, NULL is returned and errno set.
+ */
+LIBBPF_API struct btf *btf__new_split_base_ref(struct btf *src_btf);
+
 LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
 LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf);
 LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 51732ecb1385..3d53b1781af1 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -414,5 +414,6 @@  LIBBPF_1.4.0 {
 		bpf_raw_tracepoint_open_opts;
 		bpf_token_create;
 		btf__new_split;
+		btf__new_split_base_ref;
 		btf_ext__raw_data;
 } LIBBPF_1.3.0;