diff mbox series

[bpf-next,1/2] libbpf: accommodate DWARF/compiler bug with duplicated structs

Message ID 20211117194114.347675-1-andrii@kernel.org (mailing list archive)
State Accepted
Commit efdd3eb8015e7447095f02a26eaabd164cd18004
Delegated to: BPF
Headers show
Series [bpf-next,1/2] libbpf: accommodate DWARF/compiler bug with duplicated structs | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 6 maintainers not CCed: kafai@fb.com songliubraving@fb.com john.fastabend@gmail.com yhs@fb.com netdev@vger.kernel.org kpsingh@kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: 'Accomodate' may be misspelled - perhaps 'Accommodate'? WARNING: line length of 82 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next fail VM_Test

Commit Message

Andrii Nakryiko Nov. 17, 2021, 7:41 p.m. UTC
According to [0], compilers sometimes might produce duplicate DWARF
definitions for exactly the same struct/union within the same
compilation unit (CU). We've had similar issues with identical arrays
and handled them with a similar workaround in 6b6e6b1d09aa ("libbpf:
Accomodate DWARF/compiler bug with duplicated identical arrays"). Do the
same for struct/union by ensuring that two structs/unions are exactly
the same, down to the integer values of field referenced type IDs.

Solving this more generically (allowing referenced types to be
equivalent, but using different type IDs, all within a single CU)
requires a huge complexity increase to handle many-to-many mappings
between canonidal and candidate type graphs. Before we invest in that,
let's see if this approach handles all the instances of this issue in
practice. Thankfully it's pretty rare, it seems.

  [0] https://lore.kernel.org/bpf/YXr2NFlJTAhHdZqq@krava/

Reported-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 tools/lib/bpf/btf.c | 45 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 4 deletions(-)

Comments

Andrii Nakryiko Nov. 17, 2021, 7:42 p.m. UTC | #1
On Wed, Nov 17, 2021 at 11:41 AM Andrii Nakryiko <andrii@kernel.org> wrote:
>
> According to [0], compilers sometimes might produce duplicate DWARF
> definitions for exactly the same struct/union within the same
> compilation unit (CU). We've had similar issues with identical arrays
> and handled them with a similar workaround in 6b6e6b1d09aa ("libbpf:
> Accomodate DWARF/compiler bug with duplicated identical arrays"). Do the
> same for struct/union by ensuring that two structs/unions are exactly
> the same, down to the integer values of field referenced type IDs.

Jiri, can you please try this in your setup and see if that handles
all situations or there are more complicated ones still. We'll need a
test for more complicated ones in that case :( Thanks.

>
> Solving this more generically (allowing referenced types to be
> equivalent, but using different type IDs, all within a single CU)
> requires a huge complexity increase to handle many-to-many mappings
> between canonidal and candidate type graphs. Before we invest in that,
> let's see if this approach handles all the instances of this issue in
> practice. Thankfully it's pretty rare, it seems.
>
>   [0] https://lore.kernel.org/bpf/YXr2NFlJTAhHdZqq@krava/
>
> Reported-by: Jiri Olsa <jolsa@kernel.org>
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  tools/lib/bpf/btf.c | 45 +++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 41 insertions(+), 4 deletions(-)
>
> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index b6be579e0dc6..e97217a77196 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c
> @@ -3477,8 +3477,8 @@ static long btf_hash_struct(struct btf_type *t)
>  }
>
>  /*
> - * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
> - * IDs. This check is performed during type graph equivalence check and
> + * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced
> + * type IDs. This check is performed during type graph equivalence check and
>   * referenced types equivalence is checked separately.
>   */
>  static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
> @@ -3851,6 +3851,31 @@ static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
>         return btf_equal_array(t1, t2);
>  }
>
> +/* Check if given two types are identical STRUCT/UNION definitions */
> +static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
> +{
> +       const struct btf_member *m1, *m2;
> +       struct btf_type *t1, *t2;
> +       int n, i;
> +
> +       t1 = btf_type_by_id(d->btf, id1);
> +       t2 = btf_type_by_id(d->btf, id2);
> +
> +       if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
> +               return false;
> +
> +       if (!btf_shallow_equal_struct(t1, t2))
> +               return false;
> +
> +       m1 = btf_members(t1);
> +       m2 = btf_members(t2);
> +       for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
> +               if (m1->type != m2->type)
> +                       return false;
> +       }
> +       return true;
> +}
> +
>  /*
>   * Check equivalence of BTF type graph formed by candidate struct/union (we'll
>   * call it "candidate graph" in this description for brevity) to a type graph
> @@ -3962,6 +3987,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
>
>         hypot_type_id = d->hypot_map[canon_id];
>         if (hypot_type_id <= BTF_MAX_NR_TYPES) {
> +               if (hypot_type_id == cand_id)
> +                       return 1;
>                 /* In some cases compiler will generate different DWARF types
>                  * for *identical* array type definitions and use them for
>                  * different fields within the *same* struct. This breaks type
> @@ -3970,8 +3997,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
>                  * types within a single CU. So work around that by explicitly
>                  * allowing identical array types here.
>                  */
> -               return hypot_type_id == cand_id ||
> -                      btf_dedup_identical_arrays(d, hypot_type_id, cand_id);
> +               if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
> +                       return 1;
> +               /* It turns out that similar situation can happen with
> +                * struct/union sometimes, sigh... Handle the case where
> +                * structs/unions are exactly the same, down to the referenced
> +                * type IDs. Anything more complicated (e.g., if referenced
> +                * types are different, but equivalent) is *way more*
> +                * complicated and requires a many-to-many equivalence mapping.
> +                */
> +               if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
> +                       return 1;
> +               return 0;
>         }
>
>         if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
> --
> 2.30.2
>
Jiri Olsa Nov. 18, 2021, 2:27 p.m. UTC | #2
On Wed, Nov 17, 2021 at 11:42:58AM -0800, Andrii Nakryiko wrote:
> On Wed, Nov 17, 2021 at 11:41 AM Andrii Nakryiko <andrii@kernel.org> wrote:
> >
> > According to [0], compilers sometimes might produce duplicate DWARF
> > definitions for exactly the same struct/union within the same
> > compilation unit (CU). We've had similar issues with identical arrays
> > and handled them with a similar workaround in 6b6e6b1d09aa ("libbpf:
> > Accomodate DWARF/compiler bug with duplicated identical arrays"). Do the
> > same for struct/union by ensuring that two structs/unions are exactly
> > the same, down to the integer values of field referenced type IDs.
> 
> Jiri, can you please try this in your setup and see if that handles
> all situations or there are more complicated ones still. We'll need a
> test for more complicated ones in that case :( Thanks.

it seems to help largely, but I still see few modules (67 out of 780)
that keep 'struct module' for some reason.. their struct module looks
completely the same as is in vmlinux

I uploaded one of the module with vmlinux in here:
  http://people.redhat.com/~jolsa/kmodbtf/2/

I will do some debugging and find out why it did not work in this module
and try to come up with another test

thanks,
jirka

> 
> >
> > Solving this more generically (allowing referenced types to be
> > equivalent, but using different type IDs, all within a single CU)
> > requires a huge complexity increase to handle many-to-many mappings
> > between canonidal and candidate type graphs. Before we invest in that,
> > let's see if this approach handles all the instances of this issue in
> > practice. Thankfully it's pretty rare, it seems.
> >
> >   [0] https://lore.kernel.org/bpf/YXr2NFlJTAhHdZqq@krava/
> >
> > Reported-by: Jiri Olsa <jolsa@kernel.org>
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >  tools/lib/bpf/btf.c | 45 +++++++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 41 insertions(+), 4 deletions(-)
> >
> > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > index b6be579e0dc6..e97217a77196 100644
> > --- a/tools/lib/bpf/btf.c
> > +++ b/tools/lib/bpf/btf.c
> > @@ -3477,8 +3477,8 @@ static long btf_hash_struct(struct btf_type *t)
> >  }
> >
> >  /*
> > - * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
> > - * IDs. This check is performed during type graph equivalence check and
> > + * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced
> > + * type IDs. This check is performed during type graph equivalence check and
> >   * referenced types equivalence is checked separately.
> >   */
> >  static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
> > @@ -3851,6 +3851,31 @@ static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
> >         return btf_equal_array(t1, t2);
> >  }
> >
> > +/* Check if given two types are identical STRUCT/UNION definitions */
> > +static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
> > +{
> > +       const struct btf_member *m1, *m2;
> > +       struct btf_type *t1, *t2;
> > +       int n, i;
> > +
> > +       t1 = btf_type_by_id(d->btf, id1);
> > +       t2 = btf_type_by_id(d->btf, id2);
> > +
> > +       if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
> > +               return false;
> > +
> > +       if (!btf_shallow_equal_struct(t1, t2))
> > +               return false;
> > +
> > +       m1 = btf_members(t1);
> > +       m2 = btf_members(t2);
> > +       for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
> > +               if (m1->type != m2->type)
> > +                       return false;
> > +       }
> > +       return true;
> > +}
> > +
> >  /*
> >   * Check equivalence of BTF type graph formed by candidate struct/union (we'll
> >   * call it "candidate graph" in this description for brevity) to a type graph
> > @@ -3962,6 +3987,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
> >
> >         hypot_type_id = d->hypot_map[canon_id];
> >         if (hypot_type_id <= BTF_MAX_NR_TYPES) {
> > +               if (hypot_type_id == cand_id)
> > +                       return 1;
> >                 /* In some cases compiler will generate different DWARF types
> >                  * for *identical* array type definitions and use them for
> >                  * different fields within the *same* struct. This breaks type
> > @@ -3970,8 +3997,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
> >                  * types within a single CU. So work around that by explicitly
> >                  * allowing identical array types here.
> >                  */
> > -               return hypot_type_id == cand_id ||
> > -                      btf_dedup_identical_arrays(d, hypot_type_id, cand_id);
> > +               if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
> > +                       return 1;
> > +               /* It turns out that similar situation can happen with
> > +                * struct/union sometimes, sigh... Handle the case where
> > +                * structs/unions are exactly the same, down to the referenced
> > +                * type IDs. Anything more complicated (e.g., if referenced
> > +                * types are different, but equivalent) is *way more*
> > +                * complicated and requires a many-to-many equivalence mapping.
> > +                */
> > +               if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
> > +                       return 1;
> > +               return 0;
> >         }
> >
> >         if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
> > --
> > 2.30.2
> >
>
Andrii Nakryiko Nov. 18, 2021, 10:49 p.m. UTC | #3
On Thu, Nov 18, 2021 at 6:27 AM Jiri Olsa <jolsa@redhat.com> wrote:
>
> On Wed, Nov 17, 2021 at 11:42:58AM -0800, Andrii Nakryiko wrote:
> > On Wed, Nov 17, 2021 at 11:41 AM Andrii Nakryiko <andrii@kernel.org> wrote:
> > >
> > > According to [0], compilers sometimes might produce duplicate DWARF
> > > definitions for exactly the same struct/union within the same
> > > compilation unit (CU). We've had similar issues with identical arrays
> > > and handled them with a similar workaround in 6b6e6b1d09aa ("libbpf:
> > > Accomodate DWARF/compiler bug with duplicated identical arrays"). Do the
> > > same for struct/union by ensuring that two structs/unions are exactly
> > > the same, down to the integer values of field referenced type IDs.
> >
> > Jiri, can you please try this in your setup and see if that handles
> > all situations or there are more complicated ones still. We'll need a
> > test for more complicated ones in that case :( Thanks.
>
> it seems to help largely, but I still see few modules (67 out of 780)
> that keep 'struct module' for some reason.. their struct module looks
> completely the same as is in vmlinux

Curious, what's the size of all the module BTFs now? And yes, please
try to narrow down what is causing the bloat this time. I think this
change can go in anyways, it's conceptually the same as a workaround
for duplicate arrays produced by the compiler.

>
> I uploaded one of the module with vmlinux in here:
>   http://people.redhat.com/~jolsa/kmodbtf/2/
>
> I will do some debugging and find out why it did not work in this module
> and try to come up with another test
>
> thanks,
> jirka
>
> >
> > >
> > > Solving this more generically (allowing referenced types to be
> > > equivalent, but using different type IDs, all within a single CU)
> > > requires a huge complexity increase to handle many-to-many mappings
> > > between canonidal and candidate type graphs. Before we invest in that,
> > > let's see if this approach handles all the instances of this issue in
> > > practice. Thankfully it's pretty rare, it seems.
> > >
> > >   [0] https://lore.kernel.org/bpf/YXr2NFlJTAhHdZqq@krava/
> > >
> > > Reported-by: Jiri Olsa <jolsa@kernel.org>
> > > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > > ---
> > >  tools/lib/bpf/btf.c | 45 +++++++++++++++++++++++++++++++++++++++++----
> > >  1 file changed, 41 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > > index b6be579e0dc6..e97217a77196 100644
> > > --- a/tools/lib/bpf/btf.c
> > > +++ b/tools/lib/bpf/btf.c
> > > @@ -3477,8 +3477,8 @@ static long btf_hash_struct(struct btf_type *t)
> > >  }
> > >
> > >  /*
> > > - * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
> > > - * IDs. This check is performed during type graph equivalence check and
> > > + * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced
> > > + * type IDs. This check is performed during type graph equivalence check and
> > >   * referenced types equivalence is checked separately.
> > >   */
> > >  static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
> > > @@ -3851,6 +3851,31 @@ static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
> > >         return btf_equal_array(t1, t2);
> > >  }
> > >
> > > +/* Check if given two types are identical STRUCT/UNION definitions */
> > > +static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
> > > +{
> > > +       const struct btf_member *m1, *m2;
> > > +       struct btf_type *t1, *t2;
> > > +       int n, i;
> > > +
> > > +       t1 = btf_type_by_id(d->btf, id1);
> > > +       t2 = btf_type_by_id(d->btf, id2);
> > > +
> > > +       if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
> > > +               return false;
> > > +
> > > +       if (!btf_shallow_equal_struct(t1, t2))
> > > +               return false;
> > > +
> > > +       m1 = btf_members(t1);
> > > +       m2 = btf_members(t2);
> > > +       for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
> > > +               if (m1->type != m2->type)
> > > +                       return false;
> > > +       }
> > > +       return true;
> > > +}
> > > +
> > >  /*
> > >   * Check equivalence of BTF type graph formed by candidate struct/union (we'll
> > >   * call it "candidate graph" in this description for brevity) to a type graph
> > > @@ -3962,6 +3987,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
> > >
> > >         hypot_type_id = d->hypot_map[canon_id];
> > >         if (hypot_type_id <= BTF_MAX_NR_TYPES) {
> > > +               if (hypot_type_id == cand_id)
> > > +                       return 1;
> > >                 /* In some cases compiler will generate different DWARF types
> > >                  * for *identical* array type definitions and use them for
> > >                  * different fields within the *same* struct. This breaks type
> > > @@ -3970,8 +3997,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
> > >                  * types within a single CU. So work around that by explicitly
> > >                  * allowing identical array types here.
> > >                  */
> > > -               return hypot_type_id == cand_id ||
> > > -                      btf_dedup_identical_arrays(d, hypot_type_id, cand_id);
> > > +               if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
> > > +                       return 1;
> > > +               /* It turns out that similar situation can happen with
> > > +                * struct/union sometimes, sigh... Handle the case where
> > > +                * structs/unions are exactly the same, down to the referenced
> > > +                * type IDs. Anything more complicated (e.g., if referenced
> > > +                * types are different, but equivalent) is *way more*
> > > +                * complicated and requires a many-to-many equivalence mapping.
> > > +                */
> > > +               if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
> > > +                       return 1;
> > > +               return 0;
> > >         }
> > >
> > >         if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
> > > --
> > > 2.30.2
> > >
> >
>
patchwork-bot+netdevbpf@kernel.org Nov. 19, 2021, 4:10 p.m. UTC | #4
Hello:

This series was applied to bpf/bpf-next.git (master)
by Daniel Borkmann <daniel@iogearbox.net>:

On Wed, 17 Nov 2021 11:41:13 -0800 you wrote:
> According to [0], compilers sometimes might produce duplicate DWARF
> definitions for exactly the same struct/union within the same
> compilation unit (CU). We've had similar issues with identical arrays
> and handled them with a similar workaround in 6b6e6b1d09aa ("libbpf:
> Accomodate DWARF/compiler bug with duplicated identical arrays"). Do the
> same for struct/union by ensuring that two structs/unions are exactly
> the same, down to the integer values of field referenced type IDs.
> 
> [...]

Here is the summary with links:
  - [bpf-next,1/2] libbpf: accommodate DWARF/compiler bug with duplicated structs
    https://git.kernel.org/bpf/bpf-next/c/efdd3eb8015e
  - [bpf-next,2/2] selftests/bpf: add btf_dedup case with duplicated structs within CU
    https://git.kernel.org/bpf/bpf-next/c/9a49afe6f5a5

You are awesome, thank you!
Jiri Olsa Nov. 24, 2021, 11:38 a.m. UTC | #5
On Thu, Nov 18, 2021 at 02:49:35PM -0800, Andrii Nakryiko wrote:
> On Thu, Nov 18, 2021 at 6:27 AM Jiri Olsa <jolsa@redhat.com> wrote:
> >
> > On Wed, Nov 17, 2021 at 11:42:58AM -0800, Andrii Nakryiko wrote:
> > > On Wed, Nov 17, 2021 at 11:41 AM Andrii Nakryiko <andrii@kernel.org> wrote:
> > > >
> > > > According to [0], compilers sometimes might produce duplicate DWARF
> > > > definitions for exactly the same struct/union within the same
> > > > compilation unit (CU). We've had similar issues with identical arrays
> > > > and handled them with a similar workaround in 6b6e6b1d09aa ("libbpf:
> > > > Accomodate DWARF/compiler bug with duplicated identical arrays"). Do the
> > > > same for struct/union by ensuring that two structs/unions are exactly
> > > > the same, down to the integer values of field referenced type IDs.
> > >
> > > Jiri, can you please try this in your setup and see if that handles
> > > all situations or there are more complicated ones still. We'll need a
> > > test for more complicated ones in that case :( Thanks.
> >
> > it seems to help largely, but I still see few modules (67 out of 780)
> > that keep 'struct module' for some reason.. their struct module looks
> > completely the same as is in vmlinux
> 
> Curious, what's the size of all the module BTFs now?

sorry for delay, I was waiting for s390x server

so with 'current' fedora kernel rawhide I'm getting slightly different
total size number than before, so something has changed after the merge
window..

however the increase with BTF enabled in modules is now from 16M to 18M,
so the BTF data adds just about 2M, which I think we can live with

> And yes, please
> try to narrow down what is causing the bloat this time. I think this

I'm on it

> change can go in anyways, it's conceptually the same as a workaround
> for duplicate arrays produced by the compiler.

yes, thanks
jirka

> 
> >
> > I uploaded one of the module with vmlinux in here:
> >   http://people.redhat.com/~jolsa/kmodbtf/2/
> >
> > I will do some debugging and find out why it did not work in this module
> > and try to come up with another test
> >
> > thanks,
> > jirka
> >
> > >
> > > >
> > > > Solving this more generically (allowing referenced types to be
> > > > equivalent, but using different type IDs, all within a single CU)
> > > > requires a huge complexity increase to handle many-to-many mappings
> > > > between canonidal and candidate type graphs. Before we invest in that,
> > > > let's see if this approach handles all the instances of this issue in
> > > > practice. Thankfully it's pretty rare, it seems.
> > > >
> > > >   [0] https://lore.kernel.org/bpf/YXr2NFlJTAhHdZqq@krava/
> > > >
> > > > Reported-by: Jiri Olsa <jolsa@kernel.org>
> > > > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > > > ---
> > > >  tools/lib/bpf/btf.c | 45 +++++++++++++++++++++++++++++++++++++++++----
> > > >  1 file changed, 41 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > > > index b6be579e0dc6..e97217a77196 100644
> > > > --- a/tools/lib/bpf/btf.c
> > > > +++ b/tools/lib/bpf/btf.c
> > > > @@ -3477,8 +3477,8 @@ static long btf_hash_struct(struct btf_type *t)
> > > >  }
> > > >
> > > >  /*
> > > > - * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
> > > > - * IDs. This check is performed during type graph equivalence check and
> > > > + * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced
> > > > + * type IDs. This check is performed during type graph equivalence check and
> > > >   * referenced types equivalence is checked separately.
> > > >   */
> > > >  static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
> > > > @@ -3851,6 +3851,31 @@ static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
> > > >         return btf_equal_array(t1, t2);
> > > >  }
> > > >
> > > > +/* Check if given two types are identical STRUCT/UNION definitions */
> > > > +static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
> > > > +{
> > > > +       const struct btf_member *m1, *m2;
> > > > +       struct btf_type *t1, *t2;
> > > > +       int n, i;
> > > > +
> > > > +       t1 = btf_type_by_id(d->btf, id1);
> > > > +       t2 = btf_type_by_id(d->btf, id2);
> > > > +
> > > > +       if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
> > > > +               return false;
> > > > +
> > > > +       if (!btf_shallow_equal_struct(t1, t2))
> > > > +               return false;
> > > > +
> > > > +       m1 = btf_members(t1);
> > > > +       m2 = btf_members(t2);
> > > > +       for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
> > > > +               if (m1->type != m2->type)
> > > > +                       return false;
> > > > +       }
> > > > +       return true;
> > > > +}
> > > > +
> > > >  /*
> > > >   * Check equivalence of BTF type graph formed by candidate struct/union (we'll
> > > >   * call it "candidate graph" in this description for brevity) to a type graph
> > > > @@ -3962,6 +3987,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
> > > >
> > > >         hypot_type_id = d->hypot_map[canon_id];
> > > >         if (hypot_type_id <= BTF_MAX_NR_TYPES) {
> > > > +               if (hypot_type_id == cand_id)
> > > > +                       return 1;
> > > >                 /* In some cases compiler will generate different DWARF types
> > > >                  * for *identical* array type definitions and use them for
> > > >                  * different fields within the *same* struct. This breaks type
> > > > @@ -3970,8 +3997,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
> > > >                  * types within a single CU. So work around that by explicitly
> > > >                  * allowing identical array types here.
> > > >                  */
> > > > -               return hypot_type_id == cand_id ||
> > > > -                      btf_dedup_identical_arrays(d, hypot_type_id, cand_id);
> > > > +               if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
> > > > +                       return 1;
> > > > +               /* It turns out that similar situation can happen with
> > > > +                * struct/union sometimes, sigh... Handle the case where
> > > > +                * structs/unions are exactly the same, down to the referenced
> > > > +                * type IDs. Anything more complicated (e.g., if referenced
> > > > +                * types are different, but equivalent) is *way more*
> > > > +                * complicated and requires a many-to-many equivalence mapping.
> > > > +                */
> > > > +               if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
> > > > +                       return 1;
> > > > +               return 0;
> > > >         }
> > > >
> > > >         if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
> > > > --
> > > > 2.30.2
> > > >
> > >
> >
>
Jiri Olsa Nov. 24, 2021, 3:02 p.m. UTC | #6
On Wed, Nov 24, 2021 at 12:39:00PM +0100, Jiri Olsa wrote:
> On Thu, Nov 18, 2021 at 02:49:35PM -0800, Andrii Nakryiko wrote:
> > On Thu, Nov 18, 2021 at 6:27 AM Jiri Olsa <jolsa@redhat.com> wrote:
> > >
> > > On Wed, Nov 17, 2021 at 11:42:58AM -0800, Andrii Nakryiko wrote:
> > > > On Wed, Nov 17, 2021 at 11:41 AM Andrii Nakryiko <andrii@kernel.org> wrote:
> > > > >
> > > > > According to [0], compilers sometimes might produce duplicate DWARF
> > > > > definitions for exactly the same struct/union within the same
> > > > > compilation unit (CU). We've had similar issues with identical arrays
> > > > > and handled them with a similar workaround in 6b6e6b1d09aa ("libbpf:
> > > > > Accomodate DWARF/compiler bug with duplicated identical arrays"). Do the
> > > > > same for struct/union by ensuring that two structs/unions are exactly
> > > > > the same, down to the integer values of field referenced type IDs.
> > > >
> > > > Jiri, can you please try this in your setup and see if that handles
> > > > all situations or there are more complicated ones still. We'll need a
> > > > test for more complicated ones in that case :( Thanks.
> > >
> > > it seems to help largely, but I still see few modules (67 out of 780)
> > > that keep 'struct module' for some reason.. their struct module looks
> > > completely the same as is in vmlinux
> > 
> > Curious, what's the size of all the module BTFs now?
> 
> sorry for delay, I was waiting for s390x server
> 
> so with 'current' fedora kernel rawhide I'm getting slightly different
> total size number than before, so something has changed after the merge
> window..
> 
> however the increase with BTF enabled in modules is now from 16M to 18M,
> so the BTF data adds just about 2M, which I think we can live with
> 
> > And yes, please
> > try to narrow down what is causing the bloat this time. I think this
> 
> I'm on it

I'm seeing vmlinux BTF having just FWD record for sctp_mib struct,
while the kernel module has the full definition

kernel:

        [2798] STRUCT 'netns_sctp' size=296 vlen=46
                'sctp_statistics' type_id=2800 bits_offset=0

        [2799] FWD 'sctp_mib' fwd_kind=struct
        [2800] PTR '(anon)' type_id=2799


module before dedup:

        [78928] STRUCT 'netns_sctp' size=296 vlen=46
                'sctp_statistics' type_id=78930 bits_offset=0

        [78929] STRUCT 'sctp_mib' size=272 vlen=1
                'mibs' type_id=80518 bits_offset=0
        [78930] PTR '(anon)' type_id=78929


this field is referenced from within 'struct module' so it won't
match its kernel version and as a result extra 'struct module'
stays in the module's BTF

I'll need to check debuginfo/pahole if that FWD is correct, but
I guess it's normal that some structs might end up unwinded only
in modules and not necessarily in vmlinux

jirka
Andrii Nakryiko Nov. 24, 2021, 7:20 p.m. UTC | #7
On Wed, Nov 24, 2021 at 7:02 AM Jiri Olsa <jolsa@redhat.com> wrote:
>
> On Wed, Nov 24, 2021 at 12:39:00PM +0100, Jiri Olsa wrote:
> > On Thu, Nov 18, 2021 at 02:49:35PM -0800, Andrii Nakryiko wrote:
> > > On Thu, Nov 18, 2021 at 6:27 AM Jiri Olsa <jolsa@redhat.com> wrote:
> > > >
> > > > On Wed, Nov 17, 2021 at 11:42:58AM -0800, Andrii Nakryiko wrote:
> > > > > On Wed, Nov 17, 2021 at 11:41 AM Andrii Nakryiko <andrii@kernel.org> wrote:
> > > > > >
> > > > > > According to [0], compilers sometimes might produce duplicate DWARF
> > > > > > definitions for exactly the same struct/union within the same
> > > > > > compilation unit (CU). We've had similar issues with identical arrays
> > > > > > and handled them with a similar workaround in 6b6e6b1d09aa ("libbpf:
> > > > > > Accomodate DWARF/compiler bug with duplicated identical arrays"). Do the
> > > > > > same for struct/union by ensuring that two structs/unions are exactly
> > > > > > the same, down to the integer values of field referenced type IDs.
> > > > >
> > > > > Jiri, can you please try this in your setup and see if that handles
> > > > > all situations or there are more complicated ones still. We'll need a
> > > > > test for more complicated ones in that case :( Thanks.
> > > >
> > > > it seems to help largely, but I still see few modules (67 out of 780)
> > > > that keep 'struct module' for some reason.. their struct module looks
> > > > completely the same as is in vmlinux
> > >
> > > Curious, what's the size of all the module BTFs now?
> >
> > sorry for delay, I was waiting for s390x server
> >
> > so with 'current' fedora kernel rawhide I'm getting slightly different
> > total size number than before, so something has changed after the merge
> > window..
> >
> > however the increase with BTF enabled in modules is now from 16M to 18M,
> > so the BTF data adds just about 2M, which I think we can live with
> >

16MB for vmlinux BTF is quite a lot. Is it a allmodyesconfig or something?

> > > And yes, please
> > > try to narrow down what is causing the bloat this time. I think this
> >
> > I'm on it
>
> I'm seeing vmlinux BTF having just FWD record for sctp_mib struct,
> while the kernel module has the full definition
>
> kernel:
>
>         [2798] STRUCT 'netns_sctp' size=296 vlen=46
>                 'sctp_statistics' type_id=2800 bits_offset=0
>
>         [2799] FWD 'sctp_mib' fwd_kind=struct
>         [2800] PTR '(anon)' type_id=2799
>
>
> module before dedup:
>
>         [78928] STRUCT 'netns_sctp' size=296 vlen=46
>                 'sctp_statistics' type_id=78930 bits_offset=0
>
>         [78929] STRUCT 'sctp_mib' size=272 vlen=1
>                 'mibs' type_id=80518 bits_offset=0
>         [78930] PTR '(anon)' type_id=78929
>
>
> this field is referenced from within 'struct module' so it won't
> match its kernel version and as a result extra 'struct module'
> stays in the module's BTF
>

Yeah, not much we could do about that short of just blindly matching
FWD to STRUCT/UNION/ENUM by name, which sounds bad to me, I avoided
doing that in BTF dedup algorithm. We only resolve FWD to
STRUCT/UNION/ENUM when we have some containing struct with a field
that points to FWD and (in another instance of the containing struct)
to STRUCT/UNION/ENUM. That way we have sort of a proof that we are
resolving the right FWD. While in this case it would be a blind guess
based on name alone.

> I'll need to check debuginfo/pahole if that FWD is correct, but
> I guess it's normal that some structs might end up unwinded only
> in modules and not necessarily in vmlinux

It can happen, yes. If that's a very common case, ideally we should
make sure that modules keep FWD or that vmlinux BTF does have a full
struct instead of FWD.


>
> jirka
>
Jiri Olsa Nov. 24, 2021, 8:21 p.m. UTC | #8
On Wed, Nov 24, 2021 at 11:20:42AM -0800, Andrii Nakryiko wrote:
> On Wed, Nov 24, 2021 at 7:02 AM Jiri Olsa <jolsa@redhat.com> wrote:
> >
> > On Wed, Nov 24, 2021 at 12:39:00PM +0100, Jiri Olsa wrote:
> > > On Thu, Nov 18, 2021 at 02:49:35PM -0800, Andrii Nakryiko wrote:
> > > > On Thu, Nov 18, 2021 at 6:27 AM Jiri Olsa <jolsa@redhat.com> wrote:
> > > > >
> > > > > On Wed, Nov 17, 2021 at 11:42:58AM -0800, Andrii Nakryiko wrote:
> > > > > > On Wed, Nov 17, 2021 at 11:41 AM Andrii Nakryiko <andrii@kernel.org> wrote:
> > > > > > >
> > > > > > > According to [0], compilers sometimes might produce duplicate DWARF
> > > > > > > definitions for exactly the same struct/union within the same
> > > > > > > compilation unit (CU). We've had similar issues with identical arrays
> > > > > > > and handled them with a similar workaround in 6b6e6b1d09aa ("libbpf:
> > > > > > > Accomodate DWARF/compiler bug with duplicated identical arrays"). Do the
> > > > > > > same for struct/union by ensuring that two structs/unions are exactly
> > > > > > > the same, down to the integer values of field referenced type IDs.
> > > > > >
> > > > > > Jiri, can you please try this in your setup and see if that handles
> > > > > > all situations or there are more complicated ones still. We'll need a
> > > > > > test for more complicated ones in that case :( Thanks.
> > > > >
> > > > > it seems to help largely, but I still see few modules (67 out of 780)
> > > > > that keep 'struct module' for some reason.. their struct module looks
> > > > > completely the same as is in vmlinux
> > > >
> > > > Curious, what's the size of all the module BTFs now?
> > >
> > > sorry for delay, I was waiting for s390x server
> > >
> > > so with 'current' fedora kernel rawhide I'm getting slightly different
> > > total size number than before, so something has changed after the merge
> > > window..
> > >
> > > however the increase with BTF enabled in modules is now from 16M to 18M,
> > > so the BTF data adds just about 2M, which I think we can live with
> > >
> 
> 16MB for vmlinux BTF is quite a lot. Is it a allmodyesconfig or something?

looks like my english betrayed me again.. sry ;-)

size of all modules without BTF is 16M,
size of all modules with BTF is 18M,

so it's overall 2M increase

also note that each module is compressed with xz

jirka

> 
> > > > And yes, please
> > > > try to narrow down what is causing the bloat this time. I think this
> > >
> > > I'm on it
> >
> > I'm seeing vmlinux BTF having just FWD record for sctp_mib struct,
> > while the kernel module has the full definition
> >
> > kernel:
> >
> >         [2798] STRUCT 'netns_sctp' size=296 vlen=46
> >                 'sctp_statistics' type_id=2800 bits_offset=0
> >
> >         [2799] FWD 'sctp_mib' fwd_kind=struct
> >         [2800] PTR '(anon)' type_id=2799
> >
> >
> > module before dedup:
> >
> >         [78928] STRUCT 'netns_sctp' size=296 vlen=46
> >                 'sctp_statistics' type_id=78930 bits_offset=0
> >
> >         [78929] STRUCT 'sctp_mib' size=272 vlen=1
> >                 'mibs' type_id=80518 bits_offset=0
> >         [78930] PTR '(anon)' type_id=78929
> >
> >
> > this field is referenced from within 'struct module' so it won't
> > match its kernel version and as a result extra 'struct module'
> > stays in the module's BTF
> >
> 
> Yeah, not much we could do about that short of just blindly matching
> FWD to STRUCT/UNION/ENUM by name, which sounds bad to me, I avoided
> doing that in BTF dedup algorithm. We only resolve FWD to
> STRUCT/UNION/ENUM when we have some containing struct with a field
> that points to FWD and (in another instance of the containing struct)
> to STRUCT/UNION/ENUM. That way we have sort of a proof that we are
> resolving the right FWD. While in this case it would be a blind guess
> based on name alone.
> 
> > I'll need to check debuginfo/pahole if that FWD is correct, but
> > I guess it's normal that some structs might end up unwinded only
> > in modules and not necessarily in vmlinux
> 
> It can happen, yes. If that's a very common case, ideally we should
> make sure that modules keep FWD or that vmlinux BTF does have a full
> struct instead of FWD.
> 
> 
> >
> > jirka
> >
>
Andrii Nakryiko Nov. 24, 2021, 8:42 p.m. UTC | #9
On Wed, Nov 24, 2021 at 12:21 PM Jiri Olsa <jolsa@redhat.com> wrote:
>
> On Wed, Nov 24, 2021 at 11:20:42AM -0800, Andrii Nakryiko wrote:
> > On Wed, Nov 24, 2021 at 7:02 AM Jiri Olsa <jolsa@redhat.com> wrote:
> > >
> > > On Wed, Nov 24, 2021 at 12:39:00PM +0100, Jiri Olsa wrote:
> > > > On Thu, Nov 18, 2021 at 02:49:35PM -0800, Andrii Nakryiko wrote:
> > > > > On Thu, Nov 18, 2021 at 6:27 AM Jiri Olsa <jolsa@redhat.com> wrote:
> > > > > >
> > > > > > On Wed, Nov 17, 2021 at 11:42:58AM -0800, Andrii Nakryiko wrote:
> > > > > > > On Wed, Nov 17, 2021 at 11:41 AM Andrii Nakryiko <andrii@kernel.org> wrote:
> > > > > > > >
> > > > > > > > According to [0], compilers sometimes might produce duplicate DWARF
> > > > > > > > definitions for exactly the same struct/union within the same
> > > > > > > > compilation unit (CU). We've had similar issues with identical arrays
> > > > > > > > and handled them with a similar workaround in 6b6e6b1d09aa ("libbpf:
> > > > > > > > Accomodate DWARF/compiler bug with duplicated identical arrays"). Do the
> > > > > > > > same for struct/union by ensuring that two structs/unions are exactly
> > > > > > > > the same, down to the integer values of field referenced type IDs.
> > > > > > >
> > > > > > > Jiri, can you please try this in your setup and see if that handles
> > > > > > > all situations or there are more complicated ones still. We'll need a
> > > > > > > test for more complicated ones in that case :( Thanks.
> > > > > >
> > > > > > it seems to help largely, but I still see few modules (67 out of 780)
> > > > > > that keep 'struct module' for some reason.. their struct module looks
> > > > > > completely the same as is in vmlinux
> > > > >
> > > > > Curious, what's the size of all the module BTFs now?
> > > >
> > > > sorry for delay, I was waiting for s390x server
> > > >
> > > > so with 'current' fedora kernel rawhide I'm getting slightly different
> > > > total size number than before, so something has changed after the merge
> > > > window..
> > > >
> > > > however the increase with BTF enabled in modules is now from 16M to 18M,
> > > > so the BTF data adds just about 2M, which I think we can live with
> > > >
> >
> > 16MB for vmlinux BTF is quite a lot. Is it a allmodyesconfig or something?
>
> looks like my english betrayed me again.. sry ;-)
>
> size of all modules without BTF is 16M,
> size of all modules with BTF is 18M,
>
> so it's overall 2M increase
>
> also note that each module is compressed with xz

Oh, so those 16MB are binaries of modules, not just BTF. Would be nice
to do just .BTF ELF section comparisons, but up to you, just my
curiosity.

>
> jirka
>
> >
> > > > > And yes, please
> > > > > try to narrow down what is causing the bloat this time. I think this
> > > >
> > > > I'm on it
> > >
> > > I'm seeing vmlinux BTF having just FWD record for sctp_mib struct,
> > > while the kernel module has the full definition
> > >
> > > kernel:
> > >
> > >         [2798] STRUCT 'netns_sctp' size=296 vlen=46
> > >                 'sctp_statistics' type_id=2800 bits_offset=0
> > >
> > >         [2799] FWD 'sctp_mib' fwd_kind=struct
> > >         [2800] PTR '(anon)' type_id=2799
> > >
> > >
> > > module before dedup:
> > >
> > >         [78928] STRUCT 'netns_sctp' size=296 vlen=46
> > >                 'sctp_statistics' type_id=78930 bits_offset=0
> > >
> > >         [78929] STRUCT 'sctp_mib' size=272 vlen=1
> > >                 'mibs' type_id=80518 bits_offset=0
> > >         [78930] PTR '(anon)' type_id=78929
> > >
> > >
> > > this field is referenced from within 'struct module' so it won't
> > > match its kernel version and as a result extra 'struct module'
> > > stays in the module's BTF
> > >
> >
> > Yeah, not much we could do about that short of just blindly matching
> > FWD to STRUCT/UNION/ENUM by name, which sounds bad to me, I avoided
> > doing that in BTF dedup algorithm. We only resolve FWD to
> > STRUCT/UNION/ENUM when we have some containing struct with a field
> > that points to FWD and (in another instance of the containing struct)
> > to STRUCT/UNION/ENUM. That way we have sort of a proof that we are
> > resolving the right FWD. While in this case it would be a blind guess
> > based on name alone.
> >
> > > I'll need to check debuginfo/pahole if that FWD is correct, but
> > > I guess it's normal that some structs might end up unwinded only
> > > in modules and not necessarily in vmlinux
> >
> > It can happen, yes. If that's a very common case, ideally we should
> > make sure that modules keep FWD or that vmlinux BTF does have a full
> > struct instead of FWD.
> >
> >
> > >
> > > jirka
> > >
> >
>
diff mbox series

Patch

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index b6be579e0dc6..e97217a77196 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -3477,8 +3477,8 @@  static long btf_hash_struct(struct btf_type *t)
 }
 
 /*
- * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
- * IDs. This check is performed during type graph equivalence check and
+ * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced
+ * type IDs. This check is performed during type graph equivalence check and
  * referenced types equivalence is checked separately.
  */
 static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
@@ -3851,6 +3851,31 @@  static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
 	return btf_equal_array(t1, t2);
 }
 
+/* Check if given two types are identical STRUCT/UNION definitions */
+static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
+{
+	const struct btf_member *m1, *m2;
+	struct btf_type *t1, *t2;
+	int n, i;
+
+	t1 = btf_type_by_id(d->btf, id1);
+	t2 = btf_type_by_id(d->btf, id2);
+
+	if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
+		return false;
+
+	if (!btf_shallow_equal_struct(t1, t2))
+		return false;
+
+	m1 = btf_members(t1);
+	m2 = btf_members(t2);
+	for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
+		if (m1->type != m2->type)
+			return false;
+	}
+	return true;
+}
+
 /*
  * Check equivalence of BTF type graph formed by candidate struct/union (we'll
  * call it "candidate graph" in this description for brevity) to a type graph
@@ -3962,6 +3987,8 @@  static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 
 	hypot_type_id = d->hypot_map[canon_id];
 	if (hypot_type_id <= BTF_MAX_NR_TYPES) {
+		if (hypot_type_id == cand_id)
+			return 1;
 		/* In some cases compiler will generate different DWARF types
 		 * for *identical* array type definitions and use them for
 		 * different fields within the *same* struct. This breaks type
@@ -3970,8 +3997,18 @@  static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		 * types within a single CU. So work around that by explicitly
 		 * allowing identical array types here.
 		 */
-		return hypot_type_id == cand_id ||
-		       btf_dedup_identical_arrays(d, hypot_type_id, cand_id);
+		if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
+			return 1;
+		/* It turns out that similar situation can happen with
+		 * struct/union sometimes, sigh... Handle the case where
+		 * structs/unions are exactly the same, down to the referenced
+		 * type IDs. Anything more complicated (e.g., if referenced
+		 * types are different, but equivalent) is *way more*
+		 * complicated and requires a many-to-many equivalence mapping.
+		 */
+		if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
+			return 1;
+		return 0;
 	}
 
 	if (btf_dedup_hypot_map_add(d, canon_id, cand_id))