diff mbox series

[RFC,bpf-next,09/13] libbpf: split BTF reconciliation

Message ID 20240322102455.98558-11-alan.maguire@oracle.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series bpf: support resilient split BTF | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/apply fail Patch does not apply to bpf-next-0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat

Commit Message

Alan Maguire March 22, 2024, 10:24 a.m. UTC
Map base reference BTF type ids referenced in split BTF and their
references to the base BTF passed in, and if the mapping succeeds,
reparent the split BTF to the base BTF.

Reconciliation rules are

- base types must match exactly
- enum[64] types should match all value name/value pairs, but the
  to-be-reconciled enum[64] can also define additional name/value pairs
- named fwds match to the correspondingly-named struct/union/enum/enum64
- anon struct/unions must have field names/offsets specified in base
  reference BTF matched by those in base BTF we are matching with

Reconciliation can not recurse, since it will be used in-kernel also and
we do not want to blow up the kernel stack when carrying out type
compatibility checks.  Hence we use a stack for reference type
reconciliation rather then recursive function calls.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
 tools/lib/bpf/Build             |   2 +-
 tools/lib/bpf/btf.c             |  58 ++++
 tools/lib/bpf/btf.h             |   8 +
 tools/lib/bpf/btf_reconcile.c   | 590 ++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.map        |   1 +
 tools/lib/bpf/libbpf_internal.h |   2 +
 6 files changed, 660 insertions(+), 1 deletion(-)
 create mode 100644 tools/lib/bpf/btf_reconcile.c

Comments

Andrii Nakryiko March 29, 2024, 10:01 p.m. UTC | #1
On Fri, Mar 22, 2024 at 3:26 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> Map base reference BTF type ids referenced in split BTF and their
> references to the base BTF passed in, and if the mapping succeeds,
> reparent the split BTF to the base BTF.
>
> Reconciliation rules are
>
> - base types must match exactly
> - enum[64] types should match all value name/value pairs, but the
>   to-be-reconciled enum[64] can also define additional name/value pairs
> - named fwds match to the correspondingly-named struct/union/enum/enum64

yeah, but what about their size if they are embedded? Using FWD was a
nice trick, but it's not flexible enough for recording (optionally)
size... Probably emitting an empty (but named) struct/union/enum would
be a bit better (and actually make split BTF using base ref more valid
even without pre-processing).

> - anon struct/unions must have field names/offsets specified in base
>   reference BTF matched by those in base BTF we are matching with
>
> Reconciliation can not recurse, since it will be used in-kernel also and
> we do not want to blow up the kernel stack when carrying out type
> compatibility checks.  Hence we use a stack for reference type
> reconciliation rather then recursive function calls.
>
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>  tools/lib/bpf/Build             |   2 +-
>  tools/lib/bpf/btf.c             |  58 ++++
>  tools/lib/bpf/btf.h             |   8 +
>  tools/lib/bpf/btf_reconcile.c   | 590 ++++++++++++++++++++++++++++++++

how wrong would it be to call this process "relocate" rather than "reconcile"?

>  tools/lib/bpf/libbpf.map        |   1 +
>  tools/lib/bpf/libbpf_internal.h |   2 +
>  6 files changed, 660 insertions(+), 1 deletion(-)
>  create mode 100644 tools/lib/bpf/btf_reconcile.c
>

[...]

> +/* Find next type after *id in base BTF that matches kind of type t passed in
> + * and name (if it is specified).  Match fwd kinds to appropriate kind also.
> + */
> +static int btf_reconcile_find_next(struct btf_reconcile *r, const struct btf_type *t,
> +                                  __u32 *id, const struct btf_type **tp)

I haven't grokked the whole patch logic just yet, doing a first pass,
so I might be asking stupid questions, sorry.

But it looks like we have these linear searches here to find matching
types, is that right? Wouldn't it be better to build an index first to
speed up search?

> +{
> +       const struct btf_type *nt;
> +       int kind, tkind = btf_kind(t);
> +       int tkflag = btf_kflag(t);
> +       __u32 i;
> +

[...]

> +static int btf_reconcile_int(struct btf_reconcile *r, const char *name,
> +                            const struct btf_type *t, const struct btf_type *bt)
> +{
> +       __u32 *info = (__u32 *)(t + 1);
> +       __u32 *binfo = (__u32 *)(bt + 1);
> +
> +       if (t->size != bt->size) {
> +               pr_warn("INT types '%s' disagree on size; reference base BTF says %d; base BTF says %d\n",
> +                       name, t->size, bt->size);
> +               return -EINVAL;
> +       }
> +       if (BTF_INT_ENCODING(*info) != BTF_INT_ENCODING(*binfo)) {
> +               pr_warn("INT types '%s' disagree on encoding; reference base BTF says '(%s/%s/%s); base BTF says '(%s/%s/%s)'\n",
> +                       name,
> +                       BTF_INT_ENCODING(*info) & BTF_INT_SIGNED ? "signed" : "unsigned",
> +                       BTF_INT_ENCODING(*info) & BTF_INT_CHAR ? "char" : "nonchar",
> +                       BTF_INT_ENCODING(*info) & BTF_INT_BOOL ? "bool" : "nonbool",
> +                       BTF_INT_ENCODING(*binfo) & BTF_INT_SIGNED ? "signed" : "unsigned",
> +                       BTF_INT_ENCODING(*binfo) & BTF_INT_CHAR ? "char" : "nonchar",
> +                       BTF_INT_ENCODING(*binfo) & BTF_INT_BOOL ? "bool" : "nonbool");

there is btf_int_encoding() helper, please use it

> +               return -EINVAL;
> +       }
> +       if (BTF_INT_BITS(*info) != BTF_INT_BITS(*binfo)) {

btf_int_bits()

> +               pr_warn("INT types '%s' disagree on bit size; reference base BTF says %d; base BTF says %d\n",
> +                       name, BTF_INT_BITS(*info), BTF_INT_BITS(*binfo));
> +               return -EINVAL;
> +       }
> +       return 0;
> +}
> +
> +static int btf_reconcile_float(struct btf_reconcile *r, const char *name,
> +                              const struct btf_type *t, const struct btf_type *bt)
> +{
> +
> +       if (t->size != bt->size) {
> +               pr_warn("float types '%s' disagree on size; reference base BTF says %d; base BTF says %d\n",
> +                       name, t->size, bt->size);
> +               return -EINVAL;
> +       }
> +       return 0;
> +}
> +
> +/* Ensure each enum value in type t has equivalent in base BTF and that values (if any) match. */
> +static int btf_reconcile_enum(struct btf_reconcile *r, const char *name,
> +                             const struct btf_type *t, const struct btf_type *bt)
> +{

should we care about compatibility between ENUM and ENUM64, they can
both represent the same values of the same size?

> +       struct btf_enum *v = (struct btf_enum *)(t + 1);
> +       struct btf_enum *bv = (struct btf_enum *)(bt + 1);
> +       bool found, match;
> +       int i, j;
> +
> +       for (i = 0; i < btf_vlen(t); i++, v++) {
> +               found = match = false;
> +
> +               if (!v->name_off)
> +                       continue;
> +               for (j = 0; j < btf_vlen(bt); j++, bv++) {
> +                       if (!bv->name_off)
> +                               continue;
> +                       if (strcmp(btf__name_by_offset(r->base_ref_btf, v->name_off),
> +                                  btf__name_by_offset(r->base_btf, bv->name_off)) != 0)

nit: please use local vars to shorten these long if conditions, it
will be easier to read and follow

> +                               continue;
> +                       found = true;
> +                       match = (v->val == bv->val);
> +                       break;
> +               }

[...]

> +       while ((err = btf_reconcile_find_next(r, t, &base_id, &bt)) != -ENOENT) {
> +               bt = btf_type_by_id(r->base_btf, base_id);
> +               switch (btf_kind(t)) {
> +               case BTF_KIND_INT:
> +                       err = btf_reconcile_int(r, name, t, bt);
> +                       break;
> +               case BTF_KIND_ENUM:
> +                       err = btf_reconcile_enum(r, name, t, bt);
> +                       break;
> +               case BTF_KIND_FLOAT:
> +                       err = btf_reconcile_float(r, name, t, bt);
> +                       break;
> +               case BTF_KIND_ENUM64:
> +                       err = btf_reconcile_enum64(r, name, t, bt);
> +                       break;
> +               case BTF_KIND_FWD:

well, FWD can be for enum vs struct, gotta check that

> +                       err = 0;
> +                       break;
> +               default:
> +                       return 0;
> +               }
> +               if (!err) {
> +                       r->map[id] = base_id;
> +                       return 0;
> +               }
> +       }
> +       return err;
> +}

[...]

> +                               if (err) {
> +                                       pr_warn("could not find base BTF type for base reference type[%u]\n",
> +                                               id);
> +                                       return err;
> +                               }
> +                       } else {
> +                               if (btf_reconcile_push(r, id) < 0 ||
> +                                   btf_reconcile_push(r, t->type) < 0)
> +                                       return -ENOSPC;

I'm missing something, please help me understand. I don't get why we
need a recursive algorithm at all.

In my mind, we have this small "base ref" set of types referenced from
module's BTF (split BTF part), right? So all we should need is to map
every type from base ref set to vmlinux BTF.

What I don't yet fully get is why CONST/VOLATILE or PTR need to
postpone reconciliation via a queue. By the time we get to types in
split BTF all base ref types should be mapped, so all you need is to
remap t->type to resolved vmlinux BTF, no?

I suspect the answer might have something to do with those anonymous
structs/unions which you copy verbatim into base ref BTF?

But on the latter topic, I wonder if we at all need this? Why not keep
all those anon struct/union/enum in module's part of BTF? If they are
unnamed, I doubt they will ever be referenced from kfuncs or anything
like that, so their BTF ID isn't that important.

If base BTF is all named types, it would simplify the reconciliation
process significantly, I think.

But again, I only skimmed the overall algorithm, sorry for my
laziness, but I figured it would be good to discuss the above first
anyways.

> +                       }
> +                       break;

[...]
Alan Maguire April 5, 2024, 10:06 a.m. UTC | #2
On 29/03/2024 22:01, Andrii Nakryiko wrote:
> On Fri, Mar 22, 2024 at 3:26 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>>
>> Map base reference BTF type ids referenced in split BTF and their
>> references to the base BTF passed in, and if the mapping succeeds,
>> reparent the split BTF to the base BTF.
>>
>> Reconciliation rules are
>>
>> - base types must match exactly
>> - enum[64] types should match all value name/value pairs, but the
>>   to-be-reconciled enum[64] can also define additional name/value pairs
>> - named fwds match to the correspondingly-named struct/union/enum/enum64
> 
> yeah, but what about their size if they are embedded? Using FWD was a
> nice trick, but it's not flexible enough for recording (optionally)
> size... Probably emitting an empty (but named) struct/union/enum would
> be a bit better (and actually make split BTF using base ref more valid
> even without pre-processing).
> 
>> - anon struct/unions must have field names/offsets specified in base
>>   reference BTF matched by those in base BTF we are matching with
>>
>> Reconciliation can not recurse, since it will be used in-kernel also and
>> we do not want to blow up the kernel stack when carrying out type
>> compatibility checks.  Hence we use a stack for reference type
>> reconciliation rather then recursive function calls.
>>
>> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
>> ---
>>  tools/lib/bpf/Build             |   2 +-
>>  tools/lib/bpf/btf.c             |  58 ++++
>>  tools/lib/bpf/btf.h             |   8 +
>>  tools/lib/bpf/btf_reconcile.c   | 590 ++++++++++++++++++++++++++++++++
> 
> how wrong would it be to call this process "relocate" rather than "reconcile"?
>

seems fine to me.

>>  tools/lib/bpf/libbpf.map        |   1 +
>>  tools/lib/bpf/libbpf_internal.h |   2 +
>>  6 files changed, 660 insertions(+), 1 deletion(-)
>>  create mode 100644 tools/lib/bpf/btf_reconcile.c
>>
> 
> [...]
> 
>> +/* Find next type after *id in base BTF that matches kind of type t passed in
>> + * and name (if it is specified).  Match fwd kinds to appropriate kind also.
>> + */
>> +static int btf_reconcile_find_next(struct btf_reconcile *r, const struct btf_type *t,
>> +                                  __u32 *id, const struct btf_type **tp)
> 
> I haven't grokked the whole patch logic just yet, doing a first pass,
> so I might be asking stupid questions, sorry.
> 
> But it looks like we have these linear searches here to find matching
> types, is that right? Wouldn't it be better to build an index first to
> speed up search?
> 

It would, but the aim here was to keep things simple with an eye to
sharing the code with the kernel. A lot of the libbpf hash stuff would
be handy but then we'd have to have something on the kernel side. Given
that the size of the base BTF is so small, the linear searches aren't
much of a cost.

>> +{
>> +       const struct btf_type *nt;
>> +       int kind, tkind = btf_kind(t);
>> +       int tkflag = btf_kflag(t);
>> +       __u32 i;
>> +
> 
> [...]
> 
>> +static int btf_reconcile_int(struct btf_reconcile *r, const char *name,
>> +                            const struct btf_type *t, const struct btf_type *bt)
>> +{
>> +       __u32 *info = (__u32 *)(t + 1);
>> +       __u32 *binfo = (__u32 *)(bt + 1);
>> +
>> +       if (t->size != bt->size) {
>> +               pr_warn("INT types '%s' disagree on size; reference base BTF says %d; base BTF says %d\n",
>> +                       name, t->size, bt->size);
>> +               return -EINVAL;
>> +       }
>> +       if (BTF_INT_ENCODING(*info) != BTF_INT_ENCODING(*binfo)) {
>> +               pr_warn("INT types '%s' disagree on encoding; reference base BTF says '(%s/%s/%s); base BTF says '(%s/%s/%s)'\n",
>> +                       name,
>> +                       BTF_INT_ENCODING(*info) & BTF_INT_SIGNED ? "signed" : "unsigned",
>> +                       BTF_INT_ENCODING(*info) & BTF_INT_CHAR ? "char" : "nonchar",
>> +                       BTF_INT_ENCODING(*info) & BTF_INT_BOOL ? "bool" : "nonbool",
>> +                       BTF_INT_ENCODING(*binfo) & BTF_INT_SIGNED ? "signed" : "unsigned",
>> +                       BTF_INT_ENCODING(*binfo) & BTF_INT_CHAR ? "char" : "nonchar",
>> +                       BTF_INT_ENCODING(*binfo) & BTF_INT_BOOL ? "bool" : "nonbool");
> 
> there is btf_int_encoding() helper, please use it
>
will do, and for others below too.

>> +               return -EINVAL;
>> +       }
>> +       if (BTF_INT_BITS(*info) != BTF_INT_BITS(*binfo)) {
> 
> btf_int_bits()
> 
>> +               pr_warn("INT types '%s' disagree on bit size; reference base BTF says %d; base BTF says %d\n",
>> +                       name, BTF_INT_BITS(*info), BTF_INT_BITS(*binfo));
>> +               return -EINVAL;
>> +       }
>> +       return 0;
>> +}
>> +
>> +static int btf_reconcile_float(struct btf_reconcile *r, const char *name,
>> +                              const struct btf_type *t, const struct btf_type *bt)
>> +{
>> +
>> +       if (t->size != bt->size) {
>> +               pr_warn("float types '%s' disagree on size; reference base BTF says %d; base BTF says %d\n",
>> +                       name, t->size, bt->size);
>> +               return -EINVAL;
>> +       }
>> +       return 0;
>> +}
>> +
>> +/* Ensure each enum value in type t has equivalent in base BTF and that values (if any) match. */
>> +static int btf_reconcile_enum(struct btf_reconcile *r, const char *name,
>> +                             const struct btf_type *t, const struct btf_type *bt)
>> +{
> 
> should we care about compatibility between ENUM and ENUM64, they can
> both represent the same values of the same size?
> 

so do you mean if one representation uses an enum, another an enum64?
Yep that might well be the case, I'll add that too.

>> +       struct btf_enum *v = (struct btf_enum *)(t + 1);
>> +       struct btf_enum *bv = (struct btf_enum *)(bt + 1);
>> +       bool found, match;
>> +       int i, j;
>> +
>> +       for (i = 0; i < btf_vlen(t); i++, v++) {
>> +               found = match = false;
>> +
>> +               if (!v->name_off)
>> +                       continue;
>> +               for (j = 0; j < btf_vlen(bt); j++, bv++) {
>> +                       if (!bv->name_off)
>> +                               continue;
>> +                       if (strcmp(btf__name_by_offset(r->base_ref_btf, v->name_off),
>> +                                  btf__name_by_offset(r->base_btf, bv->name_off)) != 0)
> 
> nit: please use local vars to shorten these long if conditions, it
> will be easier to read and follow
>

will do.


>> +                               continue;
>> +                       found = true;
>> +                       match = (v->val == bv->val);
>> +                       break;
>> +               }
> 
> [...]
> 
>> +       while ((err = btf_reconcile_find_next(r, t, &base_id, &bt)) != -ENOENT) {
>> +               bt = btf_type_by_id(r->base_btf, base_id);
>> +               switch (btf_kind(t)) {
>> +               case BTF_KIND_INT:
>> +                       err = btf_reconcile_int(r, name, t, bt);
>> +                       break;
>> +               case BTF_KIND_ENUM:
>> +                       err = btf_reconcile_enum(r, name, t, bt);
>> +                       break;
>> +               case BTF_KIND_FLOAT:
>> +                       err = btf_reconcile_float(r, name, t, bt);
>> +                       break;
>> +               case BTF_KIND_ENUM64:
>> +                       err = btf_reconcile_enum64(r, name, t, bt);
>> +                       break;
>> +               case BTF_KIND_FWD:
> 
> well, FWD can be for enum vs struct, gotta check that
> 
>> +                       err = 0;
>> +                       break;
>> +               default:
>> +                       return 0;
>> +               }
>> +               if (!err) {
>> +                       r->map[id] = base_id;
>> +                       return 0;
>> +               }
>> +       }
>> +       return err;
>> +}
> 
> [...]
> 
>> +                               if (err) {
>> +                                       pr_warn("could not find base BTF type for base reference type[%u]\n",
>> +                                               id);
>> +                                       return err;
>> +                               }
>> +                       } else {
>> +                               if (btf_reconcile_push(r, id) < 0 ||
>> +                                   btf_reconcile_push(r, t->type) < 0)
>> +                                       return -ENOSPC;
> 
> I'm missing something, please help me understand. I don't get why we
> need a recursive algorithm at all.
> 
> In my mind, we have this small "base ref" set of types referenced from
> module's BTF (split BTF part), right? So all we should need is to map
> every type from base ref set to vmlinux BTF.
> 
> What I don't yet fully get is why CONST/VOLATILE or PTR need to
> postpone reconciliation via a queue. By the time we get to types in
> split BTF all base ref types should be mapped, so all you need is to
> remap t->type to resolved vmlinux BTF, no?
> 

It's possible to have multiple layers of reference in the distilled base
BTF though; for example here's a case from a module's distilled base BTF:

[41] PTR '(anon)' type_id=42
[42] CONST '(anon)' type_id=0

To resolve type id 41 we need to resolve type id 42, and since type id 0
already has a mapping, at that point we can look for CONSTs that refer
to type id 0 and then once we've established that mapping we can find
PTRs that have a t->type that refers to the const. So as described below
we keep pushing type ids onto the stack until we find one with a t->type
mapping to base BTF; once we hit that we can start looking in base BTF
for types that have the mapped t->type value.

> I suspect the answer might have something to do with those anonymous
> structs/unions which you copy verbatim into base ref BTF?
>

They do add a few more types, but we can get base BTF references that
don't come from that source too. The above case PTR CONST void for
example wasn't referred to via any other distilled base types.

> But on the latter topic, I wonder if we at all need this? Why not keep
> all those anon struct/union/enum in module's part of BTF? If they are
> unnamed, I doubt they will ever be referenced from kfuncs or anything
> like that, so their BTF ID isn't that important.
> 

Changing the module BTF would make things a bit more complicated.
Currently we just update type id references and string offsets. The anon
structs we end up with tend to be very small; from the same distilled
base BTF used above here are the instances of struct/union:

[35] STRUCT '(anon)' size=4 vlen=1
        'counter' type_id=6 bits_offset=0
[62] STRUCT '(anon)' size=8 vlen=1
        'raw_lock' type_id=48 bits_offset=0
[113] UNION '(anon)' size=8 vlen=2
        'kernel' type_id=13 bits_offset=0
        'user' type_id=13 bits_offset=0
[114] STRUCT '(anon)' size=16 vlen=2
        '(anon)' type_id=113 bits_offset=0
        'is_kernel' type_id=11 bits_offset=64 bitfield_size=1
[119] STRUCT '(anon)' size=8 vlen=1
        'net' type_id=85 bits_offset=0

These only added one type that wasn't referenced elsewhere - typedef
arch_rwlock_t.

> If base BTF is all named types, it would simplify the reconciliation
> process significantly, I think.
> 
> But again, I only skimmed the overall algorithm, sorry for my
> laziness, but I figured it would be good to discuss the above first
> anyways.
>

I'll try and walk through an example of how the algorithm proceeds; that
might help make the approach concrete and we can see if it can be
simplified.

Consider split BTF that uses the base BTF type "int *", i.e. a PTR to an
"int". It could do so in an anonymous struct/union, but also as an array
member type or a FUNC_PROTO or VAR. For such a reference type, we first
encounter the outer PTR. If its t->type has a mapping to base BTF "int"
(it will), we look through the base BTF for reference types that match
the kind (here a PTR) _and_ have the required t->type (int). Once we
find the reference type in base BTF, we can add the mapping for PTR to
int from distilled->base BTF id. So the reference type with one layer of
reference is pretty straightforward.

In the case where we don't have a mapping for a t->type - let's say PTR
to PTR to int (int **) - we push the type id for PTR to PTR to int onto
the stack along with the type id for t->type (PTR to int) after. So we
will first pop PTR to int and go through the above-described type
resolution. Next we will pop the PTR to PTR to int and because we've
resolved PTR to int now, it's t->type will have a mapping and we can go
through the search process to find a PTR that refers to a PTR to int
in base BTF, and we then add that mapping too.

For an array we push the member and index types, a func proto the return
type and parameter types, etc.

So once multiple layers of reference are part of the picture I _think_
we need something like this approach.

Alan

>> +                       }
>> +                       break;
> 
> [...]
Andrii Nakryiko April 5, 2024, 7:58 p.m. UTC | #3
On Fri, Apr 5, 2024 at 3:06 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> On 29/03/2024 22:01, Andrii Nakryiko wrote:
> > On Fri, Mar 22, 2024 at 3:26 AM Alan Maguire <alan.maguire@oracle.com> wrote:
> >>
> >> Map base reference BTF type ids referenced in split BTF and their
> >> references to the base BTF passed in, and if the mapping succeeds,
> >> reparent the split BTF to the base BTF.
> >>
> >> Reconciliation rules are
> >>
> >> - base types must match exactly
> >> - enum[64] types should match all value name/value pairs, but the
> >>   to-be-reconciled enum[64] can also define additional name/value pairs
> >> - named fwds match to the correspondingly-named struct/union/enum/enum64
> >
> > yeah, but what about their size if they are embedded? Using FWD was a
> > nice trick, but it's not flexible enough for recording (optionally)
> > size... Probably emitting an empty (but named) struct/union/enum would
> > be a bit better (and actually make split BTF using base ref more valid
> > even without pre-processing).
> >
> >> - anon struct/unions must have field names/offsets specified in base
> >>   reference BTF matched by those in base BTF we are matching with
> >>
> >> Reconciliation can not recurse, since it will be used in-kernel also and
> >> we do not want to blow up the kernel stack when carrying out type
> >> compatibility checks.  Hence we use a stack for reference type
> >> reconciliation rather then recursive function calls.
> >>
> >> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> >> ---
> >>  tools/lib/bpf/Build             |   2 +-
> >>  tools/lib/bpf/btf.c             |  58 ++++
> >>  tools/lib/bpf/btf.h             |   8 +
> >>  tools/lib/bpf/btf_reconcile.c   | 590 ++++++++++++++++++++++++++++++++
> >
> > how wrong would it be to call this process "relocate" rather than "reconcile"?
> >
>
> seems fine to me.
>
> >>  tools/lib/bpf/libbpf.map        |   1 +
> >>  tools/lib/bpf/libbpf_internal.h |   2 +
> >>  6 files changed, 660 insertions(+), 1 deletion(-)
> >>  create mode 100644 tools/lib/bpf/btf_reconcile.c
> >>
> >
> > [...]
> >
> >> +/* Find next type after *id in base BTF that matches kind of type t passed in
> >> + * and name (if it is specified).  Match fwd kinds to appropriate kind also.
> >> + */
> >> +static int btf_reconcile_find_next(struct btf_reconcile *r, const struct btf_type *t,
> >> +                                  __u32 *id, const struct btf_type **tp)
> >
> > I haven't grokked the whole patch logic just yet, doing a first pass,
> > so I might be asking stupid questions, sorry.
> >
> > But it looks like we have these linear searches here to find matching
> > types, is that right? Wouldn't it be better to build an index first to
> > speed up search?
> >
>
> It would, but the aim here was to keep things simple with an eye to
> sharing the code with the kernel. A lot of the libbpf hash stuff would
> be handy but then we'd have to have something on the kernel side. Given
> that the size of the base BTF is so small, the linear searches aren't
> much of a cost.

I didn't mean hashmap, I was thinking sort+binary search, but ok, we
can do that later as an optimization.

[...]

> >> +
> >> +/* Ensure each enum value in type t has equivalent in base BTF and that values (if any) match. */
> >> +static int btf_reconcile_enum(struct btf_reconcile *r, const char *name,
> >> +                             const struct btf_type *t, const struct btf_type *bt)
> >> +{
> >
> > should we care about compatibility between ENUM and ENUM64, they can
> > both represent the same values of the same size?
> >
>
> so do you mean if one representation uses an enum, another an enum64?
> Yep that might well be the case, I'll add that too.

yep, they are two different kinds, but they overlap in what they can
represent, so we try to support them interchangeably, if possible

[...]

> >> +                               if (err) {
> >> +                                       pr_warn("could not find base BTF type for base reference type[%u]\n",
> >> +                                               id);
> >> +                                       return err;
> >> +                               }
> >> +                       } else {
> >> +                               if (btf_reconcile_push(r, id) < 0 ||
> >> +                                   btf_reconcile_push(r, t->type) < 0)
> >> +                                       return -ENOSPC;
> >
> > I'm missing something, please help me understand. I don't get why we
> > need a recursive algorithm at all.
> >
> > In my mind, we have this small "base ref" set of types referenced from
> > module's BTF (split BTF part), right? So all we should need is to map
> > every type from base ref set to vmlinux BTF.
> >
> > What I don't yet fully get is why CONST/VOLATILE or PTR need to
> > postpone reconciliation via a queue. By the time we get to types in
> > split BTF all base ref types should be mapped, so all you need is to
> > remap t->type to resolved vmlinux BTF, no?
> >
>
> It's possible to have multiple layers of reference in the distilled base
> BTF though; for example here's a case from a module's distilled base BTF:
>
> [41] PTR '(anon)' type_id=42
> [42] CONST '(anon)' type_id=0
>
> To resolve type id 41 we need to resolve type id 42, and since type id 0
> already has a mapping, at that point we can look for CONSTs that refer
> to type id 0 and then once we've established that mapping we can find
> PTRs that have a t->type that refers to the const. So as described below
> we keep pushing type ids onto the stack until we find one with a t->type
> mapping to base BTF; once we hit that we can start looking in base BTF
> for types that have the mapped t->type value.
>

see below

> > I suspect the answer might have something to do with those anonymous
> > structs/unions which you copy verbatim into base ref BTF?
> >
>
> They do add a few more types, but we can get base BTF references that
> don't come from that source too. The above case PTR CONST void for
> example wasn't referred to via any other distilled base types.

I think it's too problematic to allow unnamed types in base reference
BTF. If we keep the rule that only named
structs/unions/typedefs/int/float kinds can be in base reference, then
everything becomes much simpler and faster, without breaking any of
kfunc/PTR_TO_BTF_ID usage.

The biggest problem is probably TYPEDEF pointing to anonymous
struct/union/func proto, so we might want to still record the shape of
expected underlying type, maybe, but we still go off of just a name in
the first place. Maybe initially we should just say that any ambiguity
would be rejected and keep only TYPEDEF with name?

Let me know if this is unrealistic, though.

>
> > But on the latter topic, I wonder if we at all need this? Why not keep
> > all those anon struct/union/enum in module's part of BTF? If they are
> > unnamed, I doubt they will ever be referenced from kfuncs or anything
> > like that, so their BTF ID isn't that important.
> >
>
> Changing the module BTF would make things a bit more complicated.

you mean appending those anonymous types from base BTF to module BTF?
It shouldn't be too hard, we just do it recursively and keep track of
ID remapping (so we don't add the same type twice). Or is there
something more?


> Currently we just update type id references and string offsets. The anon
> structs we end up with tend to be very small; from the same distilled
> base BTF used above here are the instances of struct/union:
>
> [35] STRUCT '(anon)' size=4 vlen=1
>         'counter' type_id=6 bits_offset=0
> [62] STRUCT '(anon)' size=8 vlen=1
>         'raw_lock' type_id=48 bits_offset=0
> [113] UNION '(anon)' size=8 vlen=2
>         'kernel' type_id=13 bits_offset=0
>         'user' type_id=13 bits_offset=0
> [114] STRUCT '(anon)' size=16 vlen=2
>         '(anon)' type_id=113 bits_offset=0
>         'is_kernel' type_id=11 bits_offset=64 bitfield_size=1
> [119] STRUCT '(anon)' size=8 vlen=1
>         'net' type_id=85 bits_offset=0
>
> These only added one type that wasn't referenced elsewhere - typedef
> arch_rwlock_t.
>
> > If base BTF is all named types, it would simplify the reconciliation
> > process significantly, I think.
> >
> > But again, I only skimmed the overall algorithm, sorry for my
> > laziness, but I figured it would be good to discuss the above first
> > anyways.
> >
>
> I'll try and walk through an example of how the algorithm proceeds; that
> might help make the approach concrete and we can see if it can be
> simplified.
>
> Consider split BTF that uses the base BTF type "int *", i.e. a PTR to an
> "int". It could do so in an anonymous struct/union, but also as an array
> member type or a FUNC_PROTO or VAR. For such a reference type, we first
> encounter the outer PTR. If its t->type has a mapping to base BTF "int"
> (it will), we look through the base BTF for reference types that match
> the kind (here a PTR) _and_ have the required t->type (int). Once we
> find the reference type in base BTF, we can add the mapping for PTR to
> int from distilled->base BTF id. So the reference type with one layer of
> reference is pretty straightforward.
>
> In the case where we don't have a mapping for a t->type - let's say PTR
> to PTR to int (int **) - we push the type id for PTR to PTR to int onto
> the stack along with the type id for t->type (PTR to int) after. So we
> will first pop PTR to int and go through the above-described type
> resolution. Next we will pop the PTR to PTR to int and because we've
> resolved PTR to int now, it's t->type will have a mapping and we can go
> through the search process to find a PTR that refers to a PTR to int
> in base BTF, and we then add that mapping too.

sure, pretty standard dfs with memoization, makes sense

what makes me a bit uncomfortable is that if you have PTR -> STRUCT,
once you found match for STRUCT, you'll go do a linear search for any
PTR  to that STRUCT. Which starts to sound like BTF deduplication
(though admittedly matching STRUCT by name removes half of BTF dedup
complexity) and the way you are doing it right now with linear search
over vmlinux BTF is going to be slow (which is why I was proposing an
index).

Ok, anyways, it's probably going to work. If we can simplify further
with keeping just named types, it would be great. If not, so be it, I
suppose.

>
> For an array we push the member and index types, a func proto the return
> type and parameter types, etc.
>
> So once multiple layers of reference are part of the picture I _think_
> we need something like this approach.
>
> Alan
>
> >> +                       }
> >> +                       break;
> >
> > [...]
diff mbox series

Patch

diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index b6619199a706..c05cc524ca30 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@ 
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
 	    netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
 	    btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
-	    usdt.o zip.o elf.o features.o
+	    usdt.o zip.o elf.o features.o btf_reconcile.o
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d43c3eba27e0..09a11954cad8 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -5432,3 +5432,61 @@  struct btf *btf__new_split_base_ref(struct btf *src_btf)
 		errno = -ret;
 	return NULL;
 }
+
+struct btf_rewrite_strs {
+	struct btf *btf;
+	const struct btf *old_base_btf;
+	int str_start;
+	int str_diff;
+};
+
+static int btf_rewrite_strs(__u32 *str_off, void *ctx)
+{
+	struct btf_rewrite_strs *r = ctx;
+	const char *s;
+	int off;
+
+	if (!*str_off)
+		return 0;
+	if (*str_off >= r->str_start) {
+		*str_off += r->str_diff;
+	} else {
+		s = btf__str_by_offset(r->old_base_btf, *str_off);
+		if (!s)
+			return -ENOENT;
+		off = btf__add_str(r->btf, s);
+		if (off < 0)
+			return off;
+		*str_off = off;
+	}
+	return 0;
+}
+
+int btf_set_base_btf(struct btf *btf, struct btf *base_btf)
+{
+	struct btf_rewrite_strs r = {};
+	struct btf_type *t;
+	int i, err;
+
+	r.old_base_btf = btf__base_btf(btf);
+	if (!r.old_base_btf)
+		return -EINVAL;
+	r.btf = btf;
+	r.str_start = r.old_base_btf->hdr->str_len;
+	r.str_diff = base_btf->hdr->str_len - r.old_base_btf->hdr->str_len;
+	btf->base_btf = base_btf;
+	btf->start_id = btf__type_cnt(base_btf);
+	btf->start_str_off = base_btf->hdr->str_len;
+	for (i = 0; i < btf->nr_types; i++) {
+		t = (struct btf_type *)btf__type_by_id(btf, i + btf->start_id);
+		err = btf_type_visit_str_offs((struct btf_type *)t, btf_rewrite_strs, &r);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+int btf__reconcile(struct btf *btf, const struct btf *base_btf)
+{
+	return btf_reconcile(btf, base_btf, NULL);
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index c63043520149..4cda17cbcde9 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -279,6 +279,14 @@  struct btf_dedup_opts {
 
 LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
 
+/**
+ * @brief **btf__reconcile()** will check the split BTF *btf* for references
+ * to base BTF kinds, and verify those references are compatible with
+ * *base_btf*; if they are, *btf* is adjusted such that is re-parented to
+ * *base_btf* and type ids and strings are adjusted to accommodate this.
+ */
+LIBBPF_API int btf__reconcile(struct btf *btf, const struct btf *base_btf);
+
 struct btf_dump;
 
 struct btf_dump_opts {
diff --git a/tools/lib/bpf/btf_reconcile.c b/tools/lib/bpf/btf_reconcile.c
new file mode 100644
index 000000000000..84787328a1de
--- /dev/null
+++ b/tools/lib/bpf/btf_reconcile.c
@@ -0,0 +1,590 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024, Oracle and/or its affiliates. */
+
+#include "btf.h"
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+struct btf;
+
+#define BTF_MAX_NR_TYPES 0x7fffffffU
+#define BTF_UNPROCESSED_ID ((__u32)-1)
+
+struct btf_reconcile {
+	struct btf *btf;
+	const struct btf *base_btf;
+	const struct btf *base_ref_btf;
+	unsigned int nr_base_types;
+	__u32 *map;
+	__u32 *stack;
+	unsigned int stack_size;
+	unsigned int stack_limit;
+};
+
+/* Find next type after *id in base BTF that matches kind of type t passed in
+ * and name (if it is specified).  Match fwd kinds to appropriate kind also.
+ */
+static int btf_reconcile_find_next(struct btf_reconcile *r, const struct btf_type *t,
+				   __u32 *id, const struct btf_type **tp)
+{
+	const struct btf_type *nt;
+	int kind, tkind = btf_kind(t);
+	int tkflag = btf_kflag(t);
+	__u32 i;
+
+	for (i = *id + 1; i < r->nr_base_types; i++) {
+		nt = btf__type_by_id(r->base_btf, i);
+		kind = btf_kind(nt);
+		if (tkind != BTF_KIND_FWD) {
+			if (kind != tkind)
+				continue;
+		} else  {
+			switch (kind) {
+			case BTF_KIND_FWD:
+				continue;
+			case BTF_KIND_STRUCT:
+				if (tkflag)
+					continue;
+				break;
+			case BTF_KIND_UNION:
+				if (!tkflag)
+					continue;
+				break;
+			default:
+				break;
+			}
+		}
+		/* either names must match or both be anon. */
+		if (t->name_off && nt->name_off) {
+			if (strcmp(btf__name_by_offset(r->btf, t->name_off),
+				   btf__name_by_offset(r->base_btf, nt->name_off)))
+				continue;
+		} else if (t->name_off != nt->name_off) {
+			continue;
+		}
+		*tp = nt;
+		*id = i;
+		return 0;
+	}
+	return -ENOENT;
+}
+
+static int btf_reconcile_int(struct btf_reconcile *r, const char *name,
+			     const struct btf_type *t, const struct btf_type *bt)
+{
+	__u32 *info = (__u32 *)(t + 1);
+	__u32 *binfo = (__u32 *)(bt + 1);
+
+	if (t->size != bt->size) {
+		pr_warn("INT types '%s' disagree on size; reference base BTF says %d; base BTF says %d\n",
+			name, t->size, bt->size);
+		return -EINVAL;
+	}
+	if (BTF_INT_ENCODING(*info) != BTF_INT_ENCODING(*binfo)) {
+		pr_warn("INT types '%s' disagree on encoding; reference base BTF says '(%s/%s/%s); base BTF says '(%s/%s/%s)'\n",
+			name,
+			BTF_INT_ENCODING(*info) & BTF_INT_SIGNED ? "signed" : "unsigned",
+			BTF_INT_ENCODING(*info) & BTF_INT_CHAR ? "char" : "nonchar",
+			BTF_INT_ENCODING(*info) & BTF_INT_BOOL ? "bool" : "nonbool",
+			BTF_INT_ENCODING(*binfo) & BTF_INT_SIGNED ? "signed" : "unsigned",
+			BTF_INT_ENCODING(*binfo) & BTF_INT_CHAR ? "char" : "nonchar",
+			BTF_INT_ENCODING(*binfo) & BTF_INT_BOOL ? "bool" : "nonbool");
+		return -EINVAL;
+	}
+	if (BTF_INT_BITS(*info) != BTF_INT_BITS(*binfo)) {
+		pr_warn("INT types '%s' disagree on bit size; reference base BTF says %d; base BTF says %d\n",
+			name, BTF_INT_BITS(*info), BTF_INT_BITS(*binfo));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int btf_reconcile_float(struct btf_reconcile *r, const char *name,
+			       const struct btf_type *t, const struct btf_type *bt)
+{
+
+	if (t->size != bt->size) {
+		pr_warn("float types '%s' disagree on size; reference base BTF says %d; base BTF says %d\n",
+			name, t->size, bt->size);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Ensure each enum value in type t has equivalent in base BTF and that values (if any) match. */
+static int btf_reconcile_enum(struct btf_reconcile *r, const char *name,
+			      const struct btf_type *t, const struct btf_type *bt)
+{
+	struct btf_enum *v = (struct btf_enum *)(t + 1);
+	struct btf_enum *bv = (struct btf_enum *)(bt + 1);
+	bool found, match;
+	int i, j;
+
+	for (i = 0; i < btf_vlen(t); i++, v++) {
+		found = match = false;
+
+		if (!v->name_off)
+			continue;
+		for (j = 0; j < btf_vlen(bt); j++, bv++) {
+			if (!bv->name_off)
+				continue;
+			if (strcmp(btf__name_by_offset(r->base_ref_btf, v->name_off),
+				   btf__name_by_offset(r->base_btf, bv->name_off)) != 0)
+				continue;
+			found = true;
+			match = (v->val == bv->val);
+			break;
+		}
+		if (!found) {
+			if (t->name_off)
+				pr_warn("ENUM types '%s' disagree; base reference BTF has enum value '%s' (%d), base BTF does not have that value.\n",
+					name,
+					btf__name_by_offset(r->base_ref_btf, v->name_off),
+					v->val);
+			return -EINVAL;
+		}
+		if (!match) {
+			if (t->name_off)
+				pr_warn("ENUM types '%s' disagree on enum value '%s'; reference base BTF specifies value %d; base BTF specifies value %d\n",
+					name,
+					btf__name_by_offset(r->base_ref_btf, v->name_off),
+					v->val, bv->val);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+/* ensure each enum64 value in type t has equivalent in base BTF and that values match */
+static int btf_reconcile_enum64(struct btf_reconcile *r, const char *name,
+			      const struct btf_type *t, const struct btf_type *bt)
+{
+	struct btf_enum64 *v = (struct btf_enum64 *)(t + 1);
+	struct btf_enum64 *bv = (struct btf_enum64 *)(bt + 1);
+	bool found, match;
+	int i, j;
+
+	for (i = 0; i < btf_vlen(t); i++, v++) {
+		found = match = false;
+
+		if (!v->name_off)
+			continue;
+		for (j = 0; j < btf_vlen(bt); j++, bv++) {
+			if (!bv->name_off)
+				continue;
+			if (strcmp(btf__name_by_offset(r->base_ref_btf, v->name_off),
+				   btf__name_by_offset(r->base_btf, bv->name_off)) != 0)
+				continue;
+			found = true;
+			match = (btf_enum64_value(v) == btf_enum64_value(bv));
+			break;
+		}
+		if (!found) {
+			if (t->name_off)
+				pr_warn("ENUM64 types '%s' disagree; reference base BTF has enum64 value '%s' (%lld), base BTF does not have that value.\n",
+					name, btf__name_by_offset(r->base_ref_btf, v->name_off),
+					btf_enum64_value(v));
+			return -EINVAL;
+		}
+		if (!match) {
+			if (t->name_off)
+				pr_warn("ENUM64 types '%s' disagree on enum value '%s'; reference base BTF specifies value %lld; base BTF specifies value %lld\n",
+					name, btf__name_by_offset(r->base_ref_btf, v->name_off),
+					btf_enum64_value(v), btf_enum64_value(bv));
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+/* reconcile base types (int, float, enum, enum64 and fwd) */
+static int btf_reconcile_base_type(struct btf_reconcile *r, __u32 id)
+{
+	const struct btf_type *t = btf_type_by_id(r->base_ref_btf, id);
+	const char *name = btf__name_by_offset(r->base_ref_btf, t->name_off);
+	const struct btf_type *bt = NULL;
+	__u32 base_id = 0;
+	int err = 0;
+
+	switch (btf_kind(t)) {
+	case BTF_KIND_INT:
+	case BTF_KIND_ENUM:
+	case BTF_KIND_FLOAT:
+	case BTF_KIND_ENUM64:
+	case BTF_KIND_FWD:
+		break;
+	default:
+		return 0;
+	}
+
+	if (r->map[id] <= BTF_MAX_NR_TYPES)
+		return 0;
+
+	while ((err = btf_reconcile_find_next(r, t, &base_id, &bt)) != -ENOENT) {
+		bt = btf_type_by_id(r->base_btf, base_id);
+		switch (btf_kind(t)) {
+		case BTF_KIND_INT:
+			err = btf_reconcile_int(r, name, t, bt);
+			break;
+		case BTF_KIND_ENUM:
+			err = btf_reconcile_enum(r, name, t, bt);
+			break;
+		case BTF_KIND_FLOAT:
+			err = btf_reconcile_float(r, name, t, bt);
+			break;
+		case BTF_KIND_ENUM64:
+			err = btf_reconcile_enum64(r, name, t, bt);
+			break;
+		case BTF_KIND_FWD:
+			err = 0;
+			break;
+		default:
+			return 0;
+		}
+		if (!err) {
+			r->map[id] = base_id;
+			return 0;
+		}
+	}
+	return err;
+}
+
+/* all reference BTF members must be in base BTF equivalent. */
+static int btf_reconcile_check_member(struct btf_reconcile *r, const char *name,
+				      struct btf_member *m, const struct btf_type *bt,
+				      bool verbose)
+{
+	struct btf_member *bm = (struct btf_member *)(bt + 1);
+	const char *kindstr = btf_kind(bt) == BTF_KIND_STRUCT ? "STRUCT" : "UNION";
+	const char *mname, *bmname;
+	int i, bvlen = btf_vlen(bt);
+
+	mname = btf__name_by_offset(r->base_ref_btf, m->name_off);
+	for (i = 0; i < bvlen; i++, bm++) {
+		bmname = btf__name_by_offset(r->base_btf, bm->name_off);
+
+		if (!m->name_off || !bm->name_off) {
+			if (m->name_off != bm->name_off)
+				continue;
+			if (bm->offset != m->offset)
+				continue;
+		} else {
+			if (strcmp(mname, bmname) != 0)
+				continue;
+			if (bm->offset != m->offset) {
+				if (verbose) {
+					pr_warn("%s '%s' member '%s' disagrees about offset; %d in base reference BTF versus %d in base BTF\n",
+						kindstr, name, mname, bm->offset, m->offset);
+					return -EINVAL;
+				}
+			}
+		}
+		return 0;
+	}
+	if (verbose)
+		pr_warn("%s '%s' missing member '%s' found in base reference BTF\n",
+			kindstr, name, mname);
+	return -EINVAL;
+}
+
+static int btf_reconcile_struct_type(struct btf_reconcile *r, __u32 id)
+{
+	const struct btf_type *t = btf_type_by_id(r->base_ref_btf, id);
+	const char *name = btf__name_by_offset(r->base_ref_btf, t->name_off);
+	const struct btf_type *bt = NULL;
+	struct btf_member *m;
+	const char *kindstr;
+	int i, vlen, err = 0;
+	__u32 base_id = 0;
+
+	switch (btf_kind(t)) {
+	case BTF_KIND_STRUCT:
+		kindstr = "STRUCT";
+		break;
+	case BTF_KIND_UNION:
+		kindstr = "UNION";
+		break;
+	default:
+		return 0;
+	}
+
+	if (r->map[id] <= BTF_MAX_NR_TYPES)
+		return 0;
+
+	vlen = btf_vlen(t);
+
+	while ((err = btf_reconcile_find_next(r, t, &base_id, &bt)) != -ENOENT) {
+		/* must be at least as big */
+		if (bt->size < t->size) {
+			if (t->name_off) {
+				pr_warn("%s '%s' disagrees about size with reference base BTF (%d); base BTF is smaller (%d)\n",
+					kindstr, name, t->size, bt->size);
+				return -EINVAL;
+			}
+			continue;
+		}
+		/* must have at least as many elements */
+		if (btf_vlen(bt) < vlen) {
+			if (t->name_off) {
+				pr_warn("%s '%s' disagrees about number of members with reference base BTF (%d); base BTF has less (%d)\n",
+					kindstr, name, vlen, btf_vlen(bt));
+				return -EINVAL;
+			}
+			continue;
+		}
+		m = (struct btf_member *)(t + 1);
+		for (i = 0; i < vlen; i++, m++) {
+			if (btf_reconcile_check_member(r, name, m, bt, t->name_off != 0)) {
+				if (t->name_off)
+					return -EINVAL;
+				err = -EINVAL;
+				break;
+			}
+		}
+		if (!err) {
+			r->map[id] = base_id;
+			return 0;
+		}
+	}
+	return err;
+}
+
+/* Use a stack rather than recursion to manage dependent reference types.
+ * When a reference type with dependents is encountered, the approach we
+ * take depends on whether the dependents have been resolved to base
+ * BTF references via the map[].  If they all have, we can simply search
+ * for the base BTF type that has those references.  If the references
+ * are not resolved, we need to push the type and its dependents onto
+ * the stack for later resolution.  We first pop the dependents, and
+ * once these have been resolved we pop the reference type with dependents
+ * now resolved.
+ */
+static int btf_reconcile_push(struct btf_reconcile *r, __u32 id)
+{
+	if (r->stack_size >= r->stack_limit)
+		return -ENOSPC;
+	r->stack[r->stack_size++] = id;
+	return 0;
+}
+
+static __u32 btf_reconcile_pop(struct btf_reconcile *r)
+{
+	if (r->stack_size > 0)
+		return r->stack[--r->stack_size];
+	return BTF_UNPROCESSED_ID;
+}
+
+static int btf_reconcile_ref_type(struct btf_reconcile *r, __u32 id)
+{
+	const struct btf_type *t;
+	const struct btf_type *bt;
+	__u32 base_id;
+	int err = 0;
+
+	do {
+		if (r->map[id] <= BTF_MAX_NR_TYPES)
+			continue;
+		t = btf_type_by_id(r->base_ref_btf, id);
+		switch (btf_kind(t)) {
+		case BTF_KIND_CONST:
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_RESTRICT:
+		case BTF_KIND_PTR:
+		case BTF_KIND_TYPEDEF:
+		case BTF_KIND_FUNC:
+		case BTF_KIND_TYPE_TAG:
+		case BTF_KIND_DECL_TAG:
+			if (r->map[t->type] <= BTF_MAX_NR_TYPES) {
+				bt = NULL;
+				base_id = 0;
+				while ((err = btf_reconcile_find_next(r, t, &base_id, &bt))
+				       != -ENOENT) {
+					if (btf_kind(t) == BTF_KIND_DECL_TAG) {
+						if (btf_decl_tag(t) != btf_decl_tag(bt))
+							continue;
+					}
+					if (bt->type != r->map[t->type])
+						continue;
+					r->map[id] = base_id;
+					break;
+				}
+				if (err) {
+					pr_warn("could not find base BTF type for base reference type[%u]\n",
+						id);
+					return err;
+				}
+			} else {
+				if (btf_reconcile_push(r, id) < 0 ||
+				    btf_reconcile_push(r, t->type) < 0)
+					return -ENOSPC;
+			}
+			break;
+		case BTF_KIND_ARRAY: {
+			struct btf_array *ba, *a = btf_array(t);
+
+			if (r->map[a->type] <= BTF_MAX_NR_TYPES &&
+			    r->map[a->index_type] <= BTF_MAX_NR_TYPES) {
+				bt = NULL;
+				base_id = 0;
+				while ((err = btf_reconcile_find_next(r, t, &base_id, &bt))
+				       != -ENOENT) {
+					ba = btf_array(bt);
+					if (r->map[a->type] != ba->type ||
+					    r->map[a->index_type] != ba->index_type)
+						continue;
+					r->map[id] = base_id;
+					break;
+				}
+				if (err) {
+					pr_warn("could not matching find base BTF ARRAY for base reference ARRAY[%u]\n",
+						id);
+					return err;
+				}
+			} else {
+				if (btf_reconcile_push(r, id) < 0 ||
+				    btf_reconcile_push(r, a->type) < 0 ||
+				    btf_reconcile_push(r, a->index_type) < 0)
+					return -ENOSPC;
+			}
+			break;
+		}
+		case BTF_KIND_FUNC_PROTO: {
+			struct btf_param *p = btf_params(t);
+			int i, vlen = btf_vlen(t);
+
+			for (i = 0; i < vlen; i++, p++) {
+				if (r->map[p->type] > BTF_MAX_NR_TYPES)
+					break;
+			}
+			if (i == vlen && r->map[t->type] <= BTF_MAX_NR_TYPES) {
+				while ((err = btf_reconcile_find_next(r, t, &base_id, &bt))
+				       != -ENOENT) {
+					struct btf_param *bp = btf_params(bt);
+					int bvlen = btf_vlen(bt);
+					int j;
+
+					if (bvlen != vlen)
+						continue;
+					if (r->map[t->type] != bt->type)
+						continue;
+					for (j = 0, p = btf_params(t); j < bvlen; j++, bp++, p++) {
+						if (r->map[p->type] != bp->type)
+							break;
+					}
+					if (j < bvlen)
+						continue;
+					r->map[id] = base_id;
+					break;
+				}
+				if (err) {
+					pr_warn("could not find matching base BTF FUNC_PROTO for base reference FUNC_PROTO[%u]\n",
+						id);
+					return err;
+				}
+			} else {
+				if (btf_reconcile_push(r, id) < 0 ||
+				    btf_reconcile_push(r, t->type) < 0)
+					return -ENOSPC;
+				for (i = 0, p = btf_params(t); i < btf_vlen(t); i++, p++) {
+					if (btf_reconcile_push(r, p->type) < 0)
+						return -ENOSPC;
+				}
+			}
+			break;
+		}
+		default:
+			return -EINVAL;
+		}
+	} while ((id = btf_reconcile_pop(r)) <= BTF_MAX_NR_TYPES);
+
+	return 0;
+}
+
+static int btf_reconcile_rewrite_type_id(__u32 *id, void *ctx)
+{
+	struct btf_reconcile *r = ctx;
+
+	*id = r->map[*id];
+	return 0;
+}
+
+/* If successful, output of reconciliation is updated BTF with base BTF pointing
+ * at base_btf, and type ids, strings adjusted accordingly
+ */
+int btf_reconcile(struct btf *btf, const struct btf *base_btf, __u32 **map_ids)
+{
+	const struct btf *base_ref_btf = btf__base_btf(btf);
+	unsigned int nr_split_types, nr_base_ref_types;
+	unsigned int nr_types = btf__type_cnt(btf);
+	struct btf_reconcile r = {};
+	const struct btf_type *t;
+	int diff_id, err = 0;
+	__u32 id, i;
+
+	if (!base_btf || base_ref_btf == base_btf)
+		return 0;
+
+	nr_base_ref_types = btf__type_cnt(base_ref_btf);
+	r.nr_base_types = btf__type_cnt(base_btf);
+	nr_split_types = nr_types - nr_base_ref_types;
+	r.map = calloc(nr_types, sizeof(*r.map));
+	r.stack_limit = nr_base_ref_types;
+	r.stack = calloc(r.stack_limit, sizeof(*r.stack));
+	if (!r.map || !r.stack) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	diff_id = r.nr_base_types - nr_base_ref_types;
+	for (id = 1; id < nr_base_ref_types; id++)
+		r.map[id] = BTF_UNPROCESSED_ID;
+	for (id = nr_base_ref_types; id < nr_types; id++)
+		r.map[id] = id + diff_id;
+
+	r.btf = btf;
+	r.base_ref_btf = base_ref_btf;
+	r.base_btf = base_btf;
+
+	/* Build a map from base references to actual base BTF ids; it is used
+	 * to track the state of comparisons.  First map base types and fwds,
+	 * next structs/unions, and finally reference types (const, restrict,
+	 * ptr, array, func, func_proto etc).
+	 */
+	for (id = 1; id < nr_base_ref_types; id++) {
+		err = btf_reconcile_base_type(&r, id);
+		if (err)
+			goto err_out;
+	}
+	for (id = 1; id < nr_base_ref_types; id++) {
+		err = btf_reconcile_struct_type(&r, id);
+		if (err)
+			goto err_out;
+	}
+	for (id = 1; id < nr_base_ref_types; id++) {
+		err = btf_reconcile_ref_type(&r, id);
+		if (err)
+			goto err_out;
+	}
+	/* Next, rewrite type ids in split BTF, replacing split ids with updated
+	 * ids based on number of types in base BTF, and base ids with
+	 * reconciled ids from base_btf.
+	 */
+	for (i = 0, id = nr_base_ref_types; i < nr_split_types; i++, id++) {
+		t = btf__type_by_id(btf, id);
+		err = btf_type_visit_type_ids((struct btf_type *)t,
+					      btf_reconcile_rewrite_type_id, &r);
+		if (err)
+			goto err_out;
+	}
+	/* Finally reset base BTF to base_btf; as part of this operation, string
+	 * offsets are also updated, and we are done.
+	 */
+	err = btf_set_base_btf(r.btf, (struct btf *)r.base_btf);
+err_out:
+	if (!err && map_ids)
+		*map_ids = r.map;
+	else
+		free(r.map);
+	free(r.stack);
+	return err;
+}
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index e9a7cb9c3c5b..ef61985204ab 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -416,5 +416,6 @@  LIBBPF_1.4.0 {
 		btf__new_split;
 		btf__new_split_base_ref;
 		btf__parse_opts;
+		btf__reconcile;
 		btf_ext__raw_data;
 } LIBBPF_1.3.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 864b36177424..097d4d1e4bf7 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -234,6 +234,8 @@  struct btf_type;
 struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id);
 const char *btf_kind_str(const struct btf_type *t);
 const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+int btf_set_base_btf(struct btf *btf, struct btf *base_btf);
+int btf_reconcile(struct btf *btf, const struct btf *base_btf, __u32 **map_ids);
 
 static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
 {