[bpf-next] bpftool: improve btf c dump sorting stability

Message ID	20240905201206.648932-1-mykyta.yatsenko5@gmail.com (mailing list archive)
State	Superseded
Delegated to:	BPF
Headers	show Received: from mail-ed1-f47.google.com (mail-ed1-f47.google.com [209.85.208.47]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9A1C01917DA for <bpf@vger.kernel.org>; Thu, 5 Sep 2024 20:12:53 +0000 (UTC) From: Mykyta Yatsenko <mykyta.yatsenko5@gmail.com> To: bpf@vger.kernel.org, ast@kernel.org, andrii@kernel.org, daniel@iogearbox.net, kafai@meta.com, kernel-team@meta.com Cc: Mykyta Yatsenko <yatsenko@meta.com> Subject: [PATCH bpf-next] bpftool: improve btf c dump sorting stability Date: Thu, 5 Sep 2024 21:12:06 +0100 Message-ID: <20240905201206.648932-1-mykyta.yatsenko5@gmail.com> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	[bpf-next] bpftool: improve btf c dump sorting stability \| expand [bpf-next] bpftool: improve btf c dump sorting stability

Message ID

20240905201206.648932-1-mykyta.yatsenko5@gmail.com (mailing list archive)

State

Superseded

Delegated to:

BPF

Headers

From: Mykyta Yatsenko <mykyta.yatsenko5@gmail.com>
To: bpf@vger.kernel.org,
	ast@kernel.org,
	andrii@kernel.org,
	daniel@iogearbox.net,
	kafai@meta.com,
	kernel-team@meta.com
Cc: Mykyta Yatsenko <yatsenko@meta.com>
Subject: [PATCH bpf-next] bpftool: improve btf c dump sorting stability
Date: Thu,  5 Sep 2024 21:12:06 +0100
Message-ID: <20240905201206.648932-1-mykyta.yatsenko5@gmail.com>
Precedence: bulk
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit

Series

[bpf-next] bpftool: improve btf c dump sorting stability | expand

Context	Check	Description
bpf/vmtest-bpf-next-VM_Test-1	success	Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2	success	Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3	success	Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-0	success	Logs for Lint
bpf/vmtest-bpf-next-VM_Test-5	success	Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4	success	Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10	success	Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12	success	Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9	success	Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16	success	Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-15	success	Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-19	success	Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-17	success	Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-11	success	Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-18	success	Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6	success	Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25	success	Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28	success	Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-27	success	Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33	success	Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35	success	Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-34	success	Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40	success	Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41	success	Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-8	success	Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7	success	Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20	success	Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22	success	Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24	success	Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23	success	Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26	success	Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29	success	Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-21	success	Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31	success	Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30	success	Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32	success	Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36	success	Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37	success	Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38	success	Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39	fail	Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-14	success	Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-PR	fail	PR summary
bpf/vmtest-bpf-next-VM_Test-13	success	Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
netdev/tree_selection	success	Clearly marked for bpf-next
netdev/apply	fail	Patch does not apply to bpf-next-0

Context

Check

Description

bpf/vmtest-bpf-next-VM_Test-1

success

Logs for ShellCheck

bpf/vmtest-bpf-next-VM_Test-2

success

Logs for Unittests

bpf/vmtest-bpf-next-VM_Test-3

success

Logs for Validate matrix.py

bpf/vmtest-bpf-next-VM_Test-0

success

Logs for Lint

bpf/vmtest-bpf-next-VM_Test-5

success

Logs for aarch64-gcc / build-release

bpf/vmtest-bpf-next-VM_Test-4

success

Logs for aarch64-gcc / build / build for aarch64 with gcc

bpf/vmtest-bpf-next-VM_Test-10

success

Logs for aarch64-gcc / veristat

bpf/vmtest-bpf-next-VM_Test-12

success

Logs for s390x-gcc / build-release

bpf/vmtest-bpf-next-VM_Test-9

success

Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc

bpf/vmtest-bpf-next-VM_Test-16

success

Logs for s390x-gcc / veristat

bpf/vmtest-bpf-next-VM_Test-15

success

Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc

bpf/vmtest-bpf-next-VM_Test-19

success

Logs for x86_64-gcc / build-release

bpf/vmtest-bpf-next-VM_Test-17

success

Logs for set-matrix

bpf/vmtest-bpf-next-VM_Test-11

success

Logs for s390x-gcc / build / build for s390x with gcc

bpf/vmtest-bpf-next-VM_Test-18

success

Logs for x86_64-gcc / build / build for x86_64 with gcc

bpf/vmtest-bpf-next-VM_Test-6

success

Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc

bpf/vmtest-bpf-next-VM_Test-25

success

Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc

bpf/vmtest-bpf-next-VM_Test-28

success

Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2

bpf/vmtest-bpf-next-VM_Test-27

success

Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17

bpf/vmtest-bpf-next-VM_Test-33

success

Logs for x86_64-llvm-17 / veristat

bpf/vmtest-bpf-next-VM_Test-35

success

Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2

bpf/vmtest-bpf-next-VM_Test-34

success

Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18

bpf/vmtest-bpf-next-VM_Test-40

success

Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18

bpf/vmtest-bpf-next-VM_Test-41

success

Logs for x86_64-llvm-18 / veristat

bpf/vmtest-bpf-next-VM_Test-8

success

Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc

bpf/vmtest-bpf-next-VM_Test-7

success

Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc

bpf/vmtest-bpf-next-VM_Test-20

success

Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc

bpf/vmtest-bpf-next-VM_Test-22

success

Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc

bpf/vmtest-bpf-next-VM_Test-24

success

Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc

bpf/vmtest-bpf-next-VM_Test-23

success

Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc

bpf/vmtest-bpf-next-VM_Test-26

success

Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc

bpf/vmtest-bpf-next-VM_Test-29

success

Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17

bpf/vmtest-bpf-next-VM_Test-21

success

Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc

bpf/vmtest-bpf-next-VM_Test-31

success

Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17

bpf/vmtest-bpf-next-VM_Test-30

success

Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17

bpf/vmtest-bpf-next-VM_Test-32

success

Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17

bpf/vmtest-bpf-next-VM_Test-36

success

Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18

bpf/vmtest-bpf-next-VM_Test-37

success

Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18

bpf/vmtest-bpf-next-VM_Test-38

success

Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18

bpf/vmtest-bpf-next-VM_Test-39

fail

Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

bpf/vmtest-bpf-next-VM_Test-14

success

Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc

bpf/vmtest-bpf-next-PR

fail

PR summary

bpf/vmtest-bpf-next-VM_Test-13

success

Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc

netdev/tree_selection

success

Clearly marked for bpf-next

netdev/apply

fail

Patch does not apply to bpf-next-0

Commit Message

Mykyta Yatsenko Sept. 5, 2024, 8:12 p.m. UTC

From: Mykyta Yatsenko <yatsenko@meta.com>

Existing algorithm for BTF C dump sorting uses only types and names of
the structs and unions for ordering. As dump contains structs with the
same names but different contents, relative to each other ordering of
those structs will be accidental.
This patch addresses this problem by introducing a new sorting field
that contains hash of the struct/union field names and types to
disambiguate comparison of the non-unique named structs.

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
 tools/bpf/bpftool/btf.c | 93 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 79 insertions(+), 14 deletions(-)

Comments

Andrii Nakryiko Sept. 5, 2024, 8:31 p.m. UTC | #1

On Thu, Sep 5, 2024 at 1:12 PM Mykyta Yatsenko
<mykyta.yatsenko5@gmail.com> wrote:
>
> From: Mykyta Yatsenko <yatsenko@meta.com>
>
> Existing algorithm for BTF C dump sorting uses only types and names of
> the structs and unions for ordering. As dump contains structs with the
> same names but different contents, relative to each other ordering of
> those structs will be accidental.
> This patch addresses this problem by introducing a new sorting field
> that contains hash of the struct/union field names and types to
> disambiguate comparison of the non-unique named structs.
>

Did you check how stable generated vmlinux.h now is when just
rebuilding kernel with no actual changes? Does it still have some
variation?

LGTM, but let's keep the btf_type_sort_name() as is? See below.

> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> ---
>  tools/bpf/bpftool/btf.c | 93 ++++++++++++++++++++++++++++++++++-------
>  1 file changed, 79 insertions(+), 14 deletions(-)
>
> diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
> index 3b57ba095ab6..0e7151bfc3d5 100644
> --- a/tools/bpf/bpftool/btf.c
> +++ b/tools/bpf/bpftool/btf.c
> @@ -50,6 +50,7 @@ struct sort_datum {
>         int type_rank;
>         const char *sort_name;
>         const char *own_name;
> +       __u64 disambig_hash;
>  };
>
>  static const char *btf_int_enc_str(__u8 encoding)
> @@ -557,17 +558,6 @@ static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool f
>         const struct btf_type *t = btf__type_by_id(btf, index);
>
>         switch (btf_kind(t)) {
> -       case BTF_KIND_ENUM:
> -       case BTF_KIND_ENUM64: {
> -               int name_off = t->name_off;
> -
> -               if (!from_ref && !name_off && btf_vlen(t))
> -                       name_off = btf_kind(t) == BTF_KIND_ENUM64 ?
> -                               btf_enum64(t)->name_off :
> -                               btf_enum(t)->name_off;
> -
> -               return btf__name_by_offset(btf, name_off);
> -       }

Why remove this? anonymous enums will usually still have some
meaningful prefix for each enumerator value, and so sorting based on
those prefixes (effective) still seems useful for more logical
ordering

pw-bot: cr

>         case BTF_KIND_ARRAY:
>                 return btf_type_sort_name(btf, btf_array(t)->type, true);
>         case BTF_KIND_TYPE_TAG:
> @@ -584,20 +574,94 @@ static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool f
>         return NULL;
>  }
>
> +static __u64 hasher(__u64 hash, __u64 val)
> +{
> +       return hash * 31 + val;
> +}
> +
> +static __u64 btf_name_hasher(__u64 hash, const struct btf *btf, __u32 name_off)
> +{
> +       if (!name_off)
> +               return hash;
> +
> +       return hasher(hash, str_hash(btf__name_by_offset(btf, name_off)));
> +}
> +
> +static __u64 btf_type_disambig_hash(const struct btf *btf, __u32 index, bool include_members)

nit: s/index/id/ ?

> +{
> +       const struct btf_type *t = btf__type_by_id(btf, index);
> +       int i;
> +       size_t hash = 0;
> +
> +       hash = btf_name_hasher(hash, btf, t->name_off);
> +
> +       switch (btf_kind(t)) {
> +       case BTF_KIND_ENUM:
> +       case BTF_KIND_ENUM64:
> +               for (i = 0; i < btf_vlen(t); i++) {
> +                       __u32 name_off = btf_is_enum(t) ?
> +                               btf_enum(t)[i].name_off :
> +                               btf_enum64(t)[i].name_off;
> +
> +                       hash = btf_name_hasher(hash, btf, name_off);
> +               }
> +               break;
> +       case BTF_KIND_STRUCT:
> +       case BTF_KIND_UNION:
> +               if (!include_members)
> +                       break;
> +               for (i = 0; i < btf_vlen(t); i++) {
> +                       const struct btf_member *m = btf_members(t) + i;
> +
> +                       hash = btf_name_hasher(hash, btf, m->name_off);
> +                       /* resolve field type's name and hash it as well */
> +                       hash = hasher(hash, btf_type_disambig_hash(btf, m->type, false));
> +               }
> +               break;
> +       case BTF_KIND_TYPE_TAG:
> +       case BTF_KIND_CONST:
> +       case BTF_KIND_PTR:
> +       case BTF_KIND_VOLATILE:
> +       case BTF_KIND_RESTRICT:
> +       case BTF_KIND_TYPEDEF:
> +       case BTF_KIND_DECL_TAG:
> +               hash = hasher(hash, btf_type_disambig_hash(btf, t->type, include_members));
> +               break;
> +       case BTF_KIND_ARRAY: {
> +               struct btf_array *arr = btf_array(t);
> +
> +               hash = hasher(hash, arr->nelems);
> +               hash = hasher(hash, btf_type_disambig_hash(btf, arr->type, include_members));
> +               break;
> +       }
> +       default:
> +               break;
> +       }
> +       return hash;
> +}
> +
>  static int btf_type_compare(const void *left, const void *right)
>  {
>         const struct sort_datum *d1 = (const struct sort_datum *)left;
>         const struct sort_datum *d2 = (const struct sort_datum *)right;
>         int r;
>
> -       if (d1->type_rank != d2->type_rank)
> -               return d1->type_rank < d2->type_rank ? -1 : 1;
> +       r = d1->type_rank - d2->type_rank;
> +       if (r)
> +               return r;
>
>         r = strcmp(d1->sort_name, d2->sort_name);
>         if (r)
>                 return r;
>
> -       return strcmp(d1->own_name, d2->own_name);
> +       r = strcmp(d1->own_name, d2->own_name);
> +       if (r)
> +               return r;

when I was playing with this code I had stong desire to do something
like below to cut down on visual noise

r = d1->type_rank - d2->type_rank;
r = r ?: strcmp(d1->sort_name, d2->sort_name);
r = r ?: strcmp(d1->own_name, d2->own_name);
if (r)
    return r;

WDYT?

> +
> +       if (d1->disambig_hash != d2->disambig_hash)
> +               return d1->disambig_hash < d2->disambig_hash ? -1 : 1;
> +
> +       return d1->index - d2->index;
>  }
>
>  static struct sort_datum *sort_btf_c(const struct btf *btf)
> @@ -618,6 +682,7 @@ static struct sort_datum *sort_btf_c(const struct btf *btf)
>                 d->type_rank = btf_type_rank(btf, i, false);
>                 d->sort_name = btf_type_sort_name(btf, i, false);
>                 d->own_name = btf__name_by_offset(btf, t->name_off);
> +               d->disambig_hash = btf_type_disambig_hash(btf, i, true);
>         }
>
>         qsort(datums, n, sizeof(struct sort_datum), btf_type_compare);
> --
> 2.46.0
>

Mykyta Yatsenko Sept. 5, 2024, 10:33 p.m. UTC | #2

On 05/09/2024 21:31, Andrii Nakryiko wrote:
> On Thu, Sep 5, 2024 at 1:12 PM Mykyta Yatsenko
> <mykyta.yatsenko5@gmail.com> wrote:
>> From: Mykyta Yatsenko <yatsenko@meta.com>
>>
>> Existing algorithm for BTF C dump sorting uses only types and names of
>> the structs and unions for ordering. As dump contains structs with the
>> same names but different contents, relative to each other ordering of
>> those structs will be accidental.
>> This patch addresses this problem by introducing a new sorting field
>> that contains hash of the struct/union field names and types to
>> disambiguate comparison of the non-unique named structs.
>>
> Did you check how stable generated vmlinux.h now is when just
> rebuilding kernel with no actual changes? Does it still have some
> variation?
It's stable with this change, no variation at all (I tested up to about 5
times in a row rebuilding kernel and generating vmlinux.h), though, I 
would not
be surprised if some edge case for same-named structs is not covered.
It may get triggered in future by some change in kernel structures.
> LGTM, but let's keep the btf_type_sort_name() as is? See below.
>
>> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
>> ---
>>   tools/bpf/bpftool/btf.c | 93 ++++++++++++++++++++++++++++++++++-------
>>   1 file changed, 79 insertions(+), 14 deletions(-)
>>
>> diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
>> index 3b57ba095ab6..0e7151bfc3d5 100644
>> --- a/tools/bpf/bpftool/btf.c
>> +++ b/tools/bpf/bpftool/btf.c
>> @@ -50,6 +50,7 @@ struct sort_datum {
>>          int type_rank;
>>          const char *sort_name;
>>          const char *own_name;
>> +       __u64 disambig_hash;
>>   };
>>
>>   static const char *btf_int_enc_str(__u8 encoding)
>> @@ -557,17 +558,6 @@ static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool f
>>          const struct btf_type *t = btf__type_by_id(btf, index);
>>
>>          switch (btf_kind(t)) {
>> -       case BTF_KIND_ENUM:
>> -       case BTF_KIND_ENUM64: {
>> -               int name_off = t->name_off;
>> -
>> -               if (!from_ref && !name_off && btf_vlen(t))
>> -                       name_off = btf_kind(t) == BTF_KIND_ENUM64 ?
>> -                               btf_enum64(t)->name_off :
>> -                               btf_enum(t)->name_off;
>> -
>> -               return btf__name_by_offset(btf, name_off);
>> -       }
> Why remove this? anonymous enums will usually still have some
> meaningful prefix for each enumerator value, and so sorting based on
> those prefixes (effective) still seems useful for more logical
> ordering
Removed this just to simplify code a little bit. With the hash
function we use for disambiguation same-named enums, enums with
less number of elements would be pushed higher which is decent sorting 
as well.
I see why we would prefer to cluster enums with similar prefixes 
together. I'll
revert this removal in v2.
>
> pw-bot: cr
>
>>          case BTF_KIND_ARRAY:
>>                  return btf_type_sort_name(btf, btf_array(t)->type, true);
>>          case BTF_KIND_TYPE_TAG:
>> @@ -584,20 +574,94 @@ static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool f
>>          return NULL;
>>   }
>>
>> +static __u64 hasher(__u64 hash, __u64 val)
>> +{
>> +       return hash * 31 + val;
>> +}
>> +
>> +static __u64 btf_name_hasher(__u64 hash, const struct btf *btf, __u32 name_off)
>> +{
>> +       if (!name_off)
>> +               return hash;
>> +
>> +       return hasher(hash, str_hash(btf__name_by_offset(btf, name_off)));
>> +}
>> +
>> +static __u64 btf_type_disambig_hash(const struct btf *btf, __u32 index, bool include_members)
> nit: s/index/id/ ?
Ack
>
>> +{
>> +       const struct btf_type *t = btf__type_by_id(btf, index);
>> +       int i;
>> +       size_t hash = 0;
>> +
>> +       hash = btf_name_hasher(hash, btf, t->name_off);
>> +
>> +       switch (btf_kind(t)) {
>> +       case BTF_KIND_ENUM:
>> +       case BTF_KIND_ENUM64:
>> +               for (i = 0; i < btf_vlen(t); i++) {
>> +                       __u32 name_off = btf_is_enum(t) ?
>> +                               btf_enum(t)[i].name_off :
>> +                               btf_enum64(t)[i].name_off;
>> +
>> +                       hash = btf_name_hasher(hash, btf, name_off);
>> +               }
>> +               break;
>> +       case BTF_KIND_STRUCT:
>> +       case BTF_KIND_UNION:
>> +               if (!include_members)
>> +                       break;
>> +               for (i = 0; i < btf_vlen(t); i++) {
>> +                       const struct btf_member *m = btf_members(t) + i;
>> +
>> +                       hash = btf_name_hasher(hash, btf, m->name_off);
>> +                       /* resolve field type's name and hash it as well */
>> +                       hash = hasher(hash, btf_type_disambig_hash(btf, m->type, false));
>> +               }
>> +               break;
>> +       case BTF_KIND_TYPE_TAG:
>> +       case BTF_KIND_CONST:
>> +       case BTF_KIND_PTR:
>> +       case BTF_KIND_VOLATILE:
>> +       case BTF_KIND_RESTRICT:
>> +       case BTF_KIND_TYPEDEF:
>> +       case BTF_KIND_DECL_TAG:
>> +               hash = hasher(hash, btf_type_disambig_hash(btf, t->type, include_members));
>> +               break;
>> +       case BTF_KIND_ARRAY: {
>> +               struct btf_array *arr = btf_array(t);
>> +
>> +               hash = hasher(hash, arr->nelems);
>> +               hash = hasher(hash, btf_type_disambig_hash(btf, arr->type, include_members));
>> +               break;
>> +       }
>> +       default:
>> +               break;
>> +       }
>> +       return hash;
>> +}
>> +
>>   static int btf_type_compare(const void *left, const void *right)
>>   {
>>          const struct sort_datum *d1 = (const struct sort_datum *)left;
>>          const struct sort_datum *d2 = (const struct sort_datum *)right;
>>          int r;
>>
>> -       if (d1->type_rank != d2->type_rank)
>> -               return d1->type_rank < d2->type_rank ? -1 : 1;
>> +       r = d1->type_rank - d2->type_rank;
>> +       if (r)
>> +               return r;
>>
>>          r = strcmp(d1->sort_name, d2->sort_name);
>>          if (r)
>>                  return r;
>>
>> -       return strcmp(d1->own_name, d2->own_name);
>> +       r = strcmp(d1->own_name, d2->own_name);
>> +       if (r)
>> +               return r;
> when I was playing with this code I had stong desire to do something
> like below to cut down on visual noise
>
> r = d1->type_rank - d2->type_rank;
> r = r ?: strcmp(d1->sort_name, d2->sort_name);
> r = r ?: strcmp(d1->own_name, d2->own_name);
> if (r)
>      return r;
>
> WDYT?
Looks a little hard to parse for me (maybe a skill issue). Early exits 
help to disregard
context when reading the code (not sure if this makes sense), so I prefer
them. Essentially this is the same thing as theexisting code, but more 
compact,
makes sense, I'll include it in v2.
>> +
>> +       if (d1->disambig_hash != d2->disambig_hash)
>> +               return d1->disambig_hash < d2->disambig_hash ? -1 : 1;
>> +
>> +       return d1->index - d2->index;
>>   }
>>
>>   static struct sort_datum *sort_btf_c(const struct btf *btf)
>> @@ -618,6 +682,7 @@ static struct sort_datum *sort_btf_c(const struct btf *btf)
>>                  d->type_rank = btf_type_rank(btf, i, false);
>>                  d->sort_name = btf_type_sort_name(btf, i, false);
>>                  d->own_name = btf__name_by_offset(btf, t->name_off);
>> +               d->disambig_hash = btf_type_disambig_hash(btf, i, true);
>>          }
>>
>>          qsort(datums, n, sizeof(struct sort_datum), btf_type_compare);
>> --
>> 2.46.0
>>

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 3b57ba095ab6..0e7151bfc3d5 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -50,6 +50,7 @@  struct sort_datum {
 	int type_rank;
 	const char *sort_name;
 	const char *own_name;
+	__u64 disambig_hash;
 };
 
 static const char *btf_int_enc_str(__u8 encoding)
@@ -557,17 +558,6 @@  static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool f
 	const struct btf_type *t = btf__type_by_id(btf, index);
 
 	switch (btf_kind(t)) {
-	case BTF_KIND_ENUM:
-	case BTF_KIND_ENUM64: {
-		int name_off = t->name_off;
-
-		if (!from_ref && !name_off && btf_vlen(t))
-			name_off = btf_kind(t) == BTF_KIND_ENUM64 ?
-				btf_enum64(t)->name_off :
-				btf_enum(t)->name_off;
-
-		return btf__name_by_offset(btf, name_off);
-	}
 	case BTF_KIND_ARRAY:
 		return btf_type_sort_name(btf, btf_array(t)->type, true);
 	case BTF_KIND_TYPE_TAG:
@@ -584,20 +574,94 @@  static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool f
 	return NULL;
 }
 
+static __u64 hasher(__u64 hash, __u64 val)
+{
+	return hash * 31 + val;
+}
+
+static __u64 btf_name_hasher(__u64 hash, const struct btf *btf, __u32 name_off)
+{
+	if (!name_off)
+		return hash;
+
+	return hasher(hash, str_hash(btf__name_by_offset(btf, name_off)));
+}
+
+static __u64 btf_type_disambig_hash(const struct btf *btf, __u32 index, bool include_members)
+{
+	const struct btf_type *t = btf__type_by_id(btf, index);
+	int i;
+	size_t hash = 0;
+
+	hash = btf_name_hasher(hash, btf, t->name_off);
+
+	switch (btf_kind(t)) {
+	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
+		for (i = 0; i < btf_vlen(t); i++) {
+			__u32 name_off = btf_is_enum(t) ?
+				btf_enum(t)[i].name_off :
+				btf_enum64(t)[i].name_off;
+
+			hash = btf_name_hasher(hash, btf, name_off);
+		}
+		break;
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		if (!include_members)
+			break;
+		for (i = 0; i < btf_vlen(t); i++) {
+			const struct btf_member *m = btf_members(t) + i;
+
+			hash = btf_name_hasher(hash, btf, m->name_off);
+			/* resolve field type's name and hash it as well */
+			hash = hasher(hash, btf_type_disambig_hash(btf, m->type, false));
+		}
+		break;
+	case BTF_KIND_TYPE_TAG:
+	case BTF_KIND_CONST:
+	case BTF_KIND_PTR:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_DECL_TAG:
+		hash = hasher(hash, btf_type_disambig_hash(btf, t->type, include_members));
+		break;
+	case BTF_KIND_ARRAY: {
+		struct btf_array *arr = btf_array(t);
+
+		hash = hasher(hash, arr->nelems);
+		hash = hasher(hash, btf_type_disambig_hash(btf, arr->type, include_members));
+		break;
+	}
+	default:
+		break;
+	}
+	return hash;
+}
+
 static int btf_type_compare(const void *left, const void *right)
 {
 	const struct sort_datum *d1 = (const struct sort_datum *)left;
 	const struct sort_datum *d2 = (const struct sort_datum *)right;
 	int r;
 
-	if (d1->type_rank != d2->type_rank)
-		return d1->type_rank < d2->type_rank ? -1 : 1;
+	r = d1->type_rank - d2->type_rank;
+	if (r)
+		return r;
 
 	r = strcmp(d1->sort_name, d2->sort_name);
 	if (r)
 		return r;
 
-	return strcmp(d1->own_name, d2->own_name);
+	r = strcmp(d1->own_name, d2->own_name);
+	if (r)
+		return r;
+
+	if (d1->disambig_hash != d2->disambig_hash)
+		return d1->disambig_hash < d2->disambig_hash ? -1 : 1;
+
+	return d1->index - d2->index;
 }
 
 static struct sort_datum *sort_btf_c(const struct btf *btf)
@@ -618,6 +682,7 @@  static struct sort_datum *sort_btf_c(const struct btf *btf)
 		d->type_rank = btf_type_rank(btf, i, false);
 		d->sort_name = btf_type_sort_name(btf, i, false);
 		d->own_name = btf__name_by_offset(btf, t->name_off);
+		d->disambig_hash = btf_type_disambig_hash(btf, i, true);
 	}
 
 	qsort(datums, n, sizeof(struct sort_datum), btf_type_compare);

[bpf-next] bpftool: improve btf c dump sorting stability

Checks

Commit Message

Comments

Patch