diff mbox series

[bpf-next,v2] bpftool: improve btf c dump sorting stability

Message ID 20240906132453.146085-1-mykyta.yatsenko5@gmail.com (mailing list archive)
State Accepted
Commit f8c6b7913dfaa67475883f94261c278adbcaa0ae
Delegated to: BPF
Headers show
Series [bpf-next,v2] bpftool: improve btf c dump sorting stability | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
netdev/tree_selection success Clearly marked for bpf-next
netdev/apply fail Patch does not apply to bpf-next-0

Commit Message

Mykyta Yatsenko Sept. 6, 2024, 1:24 p.m. UTC
From: Mykyta Yatsenko <yatsenko@meta.com>

Existing algorithm for BTF C dump sorting uses only types and names of
the structs and unions for ordering. As dump contains structs with the
same names but different contents, relative to each other ordering of
those structs will be accidental.
This patch addresses this problem by introducing a new sorting field
that contains hash of the struct/union field names and types to
disambiguate comparison of the non-unique named structs.

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
 tools/bpf/bpftool/btf.c | 80 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 75 insertions(+), 5 deletions(-)

Comments

Eduard Zingerman Sept. 6, 2024, 7:56 p.m. UTC | #1
On Fri, 2024-09-06 at 14:24 +0100, Mykyta Yatsenko wrote:
> From: Mykyta Yatsenko <yatsenko@meta.com>
> 
> Existing algorithm for BTF C dump sorting uses only types and names of
> the structs and unions for ordering. As dump contains structs with the
> same names but different contents, relative to each other ordering of
> those structs will be accidental.
> This patch addresses this problem by introducing a new sorting field
> that contains hash of the struct/union field names and types to
> disambiguate comparison of the non-unique named structs.
> 
> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> ---

Note, this is still not fully stable, e.g.:

$ for i in $(seq 1 10); \
  do touch ./kernel/bpf/verifier.c && \
     ccache-kernel-make.sh -j23 && \
     ./tools/bpf/bpftool/bpftool btf dump file vmlinux format c > ~/work/tmp/vmlinux.h.$i; \
  done
  ...
$ md5sum ~/work/tmp/vmlinux.h.* | sort -k1
76c9b22274c4aa6253ffaafa33ceffd3  /home/eddy/work/tmp/vmlinux.h.2
76c9b22274c4aa6253ffaafa33ceffd3  /home/eddy/work/tmp/vmlinux.h.4
a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.1
a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.10
a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.3
a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.5
a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.6
a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.7
a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.8
a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.9

[...]
Andrii Nakryiko Sept. 6, 2024, 9:07 p.m. UTC | #2
On Fri, Sep 6, 2024 at 12:56 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Fri, 2024-09-06 at 14:24 +0100, Mykyta Yatsenko wrote:
> > From: Mykyta Yatsenko <yatsenko@meta.com>
> >
> > Existing algorithm for BTF C dump sorting uses only types and names of
> > the structs and unions for ordering. As dump contains structs with the
> > same names but different contents, relative to each other ordering of
> > those structs will be accidental.
> > This patch addresses this problem by introducing a new sorting field
> > that contains hash of the struct/union field names and types to
> > disambiguate comparison of the non-unique named structs.
> >
> > Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> > ---
>
> Note, this is still not fully stable, e.g.:
>
> $ for i in $(seq 1 10); \
>   do touch ./kernel/bpf/verifier.c && \
>      ccache-kernel-make.sh -j23 && \
>      ./tools/bpf/bpftool/bpftool btf dump file vmlinux format c > ~/work/tmp/vmlinux.h.$i; \
>   done
>   ...
> $ md5sum ~/work/tmp/vmlinux.h.* | sort -k1
> 76c9b22274c4aa6253ffaafa33ceffd3  /home/eddy/work/tmp/vmlinux.h.2
> 76c9b22274c4aa6253ffaafa33ceffd3  /home/eddy/work/tmp/vmlinux.h.4
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.1
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.10
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.3
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.5
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.6
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.7
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.8
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.9

Still, quite stable, compared to what it is right now. I think the
second part is more consistent ordering between CUs inside the pahole
to keep it even more stable.

Applied to bpf-next, thanks!

>
> [...]
>
patchwork-bot+netdevbpf@kernel.org Sept. 6, 2024, 9:11 p.m. UTC | #3
Hello:

This patch was applied to bpf/bpf-next.git (master)
by Andrii Nakryiko <andrii@kernel.org>:

On Fri,  6 Sep 2024 14:24:53 +0100 you wrote:
> From: Mykyta Yatsenko <yatsenko@meta.com>
> 
> Existing algorithm for BTF C dump sorting uses only types and names of
> the structs and unions for ordering. As dump contains structs with the
> same names but different contents, relative to each other ordering of
> those structs will be accidental.
> This patch addresses this problem by introducing a new sorting field
> that contains hash of the struct/union field names and types to
> disambiguate comparison of the non-unique named structs.
> 
> [...]

Here is the summary with links:
  - [bpf-next,v2] bpftool: improve btf c dump sorting stability
    https://git.kernel.org/bpf/bpf-next/c/f8c6b7913dfa

You are awesome, thank you!
Mykyta Yatsenko Sept. 6, 2024, 10 p.m. UTC | #4
On 06/09/2024 20:56, Eduard Zingerman wrote:
> On Fri, 2024-09-06 at 14:24 +0100, Mykyta Yatsenko wrote:
>> From: Mykyta Yatsenko <yatsenko@meta.com>
>>
>> Existing algorithm for BTF C dump sorting uses only types and names of
>> the structs and unions for ordering. As dump contains structs with the
>> same names but different contents, relative to each other ordering of
>> those structs will be accidental.
>> This patch addresses this problem by introducing a new sorting field
>> that contains hash of the struct/union field names and types to
>> disambiguate comparison of the non-unique named structs.
>>
>> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
>> ---
> Note, this is still not fully stable, e.g.:
>
> $ for i in $(seq 1 10); \
>    do touch ./kernel/bpf/verifier.c && \
>       ccache-kernel-make.sh -j23 && \
>       ./tools/bpf/bpftool/bpftool btf dump file vmlinux format c > ~/work/tmp/vmlinux.h.$i; \
>    done
>    ...
> $ md5sum ~/work/tmp/vmlinux.h.* | sort -k1
> 76c9b22274c4aa6253ffaafa33ceffd3  /home/eddy/work/tmp/vmlinux.h.2
> 76c9b22274c4aa6253ffaafa33ceffd3  /home/eddy/work/tmp/vmlinux.h.4
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.1
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.10
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.3
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.5
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.6
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.7
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.8
> a1c90a62e6cca59869a9cdffbaa3c4de  /home/eddy/work/tmp/vmlinux.h.9
>
> [...]
>
Interesting, thanks for showing this, I'll try to replicate this test.
diff mbox series

Patch

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 3b57ba095ab6..7d2af1ff3c8d 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -50,6 +50,7 @@  struct sort_datum {
 	int type_rank;
 	const char *sort_name;
 	const char *own_name;
+	__u64 disambig_hash;
 };
 
 static const char *btf_int_enc_str(__u8 encoding)
@@ -584,20 +585,88 @@  static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool f
 	return NULL;
 }
 
+static __u64 hasher(__u64 hash, __u64 val)
+{
+	return hash * 31 + val;
+}
+
+static __u64 btf_name_hasher(__u64 hash, const struct btf *btf, __u32 name_off)
+{
+	if (!name_off)
+		return hash;
+
+	return hasher(hash, str_hash(btf__name_by_offset(btf, name_off)));
+}
+
+static __u64 btf_type_disambig_hash(const struct btf *btf, __u32 id, bool include_members)
+{
+	const struct btf_type *t = btf__type_by_id(btf, id);
+	int i;
+	size_t hash = 0;
+
+	hash = btf_name_hasher(hash, btf, t->name_off);
+
+	switch (btf_kind(t)) {
+	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
+		for (i = 0; i < btf_vlen(t); i++) {
+			__u32 name_off = btf_is_enum(t) ?
+				btf_enum(t)[i].name_off :
+				btf_enum64(t)[i].name_off;
+
+			hash = btf_name_hasher(hash, btf, name_off);
+		}
+		break;
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		if (!include_members)
+			break;
+		for (i = 0; i < btf_vlen(t); i++) {
+			const struct btf_member *m = btf_members(t) + i;
+
+			hash = btf_name_hasher(hash, btf, m->name_off);
+			/* resolve field type's name and hash it as well */
+			hash = hasher(hash, btf_type_disambig_hash(btf, m->type, false));
+		}
+		break;
+	case BTF_KIND_TYPE_TAG:
+	case BTF_KIND_CONST:
+	case BTF_KIND_PTR:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_DECL_TAG:
+		hash = hasher(hash, btf_type_disambig_hash(btf, t->type, include_members));
+		break;
+	case BTF_KIND_ARRAY: {
+		struct btf_array *arr = btf_array(t);
+
+		hash = hasher(hash, arr->nelems);
+		hash = hasher(hash, btf_type_disambig_hash(btf, arr->type, include_members));
+		break;
+	}
+	default:
+		break;
+	}
+	return hash;
+}
+
 static int btf_type_compare(const void *left, const void *right)
 {
 	const struct sort_datum *d1 = (const struct sort_datum *)left;
 	const struct sort_datum *d2 = (const struct sort_datum *)right;
 	int r;
 
-	if (d1->type_rank != d2->type_rank)
-		return d1->type_rank < d2->type_rank ? -1 : 1;
-
-	r = strcmp(d1->sort_name, d2->sort_name);
+	r = d1->type_rank - d2->type_rank;
+	r = r ?: strcmp(d1->sort_name, d2->sort_name);
+	r = r ?: strcmp(d1->own_name, d2->own_name);
 	if (r)
 		return r;
 
-	return strcmp(d1->own_name, d2->own_name);
+	if (d1->disambig_hash != d2->disambig_hash)
+		return d1->disambig_hash < d2->disambig_hash ? -1 : 1;
+
+	return d1->index - d2->index;
 }
 
 static struct sort_datum *sort_btf_c(const struct btf *btf)
@@ -618,6 +687,7 @@  static struct sort_datum *sort_btf_c(const struct btf *btf)
 		d->type_rank = btf_type_rank(btf, i, false);
 		d->sort_name = btf_type_sort_name(btf, i, false);
 		d->own_name = btf__name_by_offset(btf, t->name_off);
+		d->disambig_hash = btf_type_disambig_hash(btf, i, true);
 	}
 
 	qsort(datums, n, sizeof(struct sort_datum), btf_type_compare);