diff mbox series

[v2,bpf-next] bpftool: introduce btf c dump sorting

Message ID 20240509151744.131648-1-yatsenko@meta.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [v2,bpf-next] bpftool: introduce btf c dump sorting | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-7 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for x86_64-gcc / build-release

Commit Message

Mykyta Yatsenko mykyta.yatsenko5@gmail.com May 9, 2024, 3:17 p.m. UTC
From: Mykyta Yatsenko <yatsenko@meta.com>

Sort bpftool c dump output; aiming to simplify vmlinux.h diffing and
forcing more natural type definitions ordering.

Definitions are sorted first by their BTF kind ranks, then by their base
type name and by their own name.

Type ranks

Assign ranks to btf kinds (defined in function btf_type_rank) to set
next order:
1. Anonymous enums/enums64
2. Named enums/enums64
3. Trivial types typedefs (ints, then floats)
4. Structs/Unions
5. Function prototypes
6. Forward declarations

Type rank is set to maximum for unnamed reference types, structs and
unions to avoid emitting those types early. They will be emitted as
part of the type chain starting with named type.

Lexicographical ordering

Each type is assigned a sort_name and own_name.
sort_name is the resolved name of the final base type for reference
types (typedef, pointer, array etc). Sorting by sort_name allows to
group typedefs of the same base type. sort_name for non-reference type
is the same as own_name. own_name is a direct name of particular type,
is used as final sorting step.

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
 tools/bpf/bpftool/btf.c | 125 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 122 insertions(+), 3 deletions(-)

Comments

Alan Maguire May 9, 2024, 4:08 p.m. UTC | #1
On 09/05/2024 16:17, Mykyta@web.codeaurora.org wrote:
> From: Mykyta Yatsenko <yatsenko@meta.com>
> 
> Sort bpftool c dump output; aiming to simplify vmlinux.h diffing and
> forcing more natural type definitions ordering.
> 
> Definitions are sorted first by their BTF kind ranks, then by their base
> type name and by their own name.
> 
> Type ranks
> 
> Assign ranks to btf kinds (defined in function btf_type_rank) to set
> next order:
> 1. Anonymous enums/enums64
> 2. Named enums/enums64
> 3. Trivial types typedefs (ints, then floats)
> 4. Structs/Unions
> 5. Function prototypes
> 6. Forward declarations
> 
> Type rank is set to maximum for unnamed reference types, structs and
> unions to avoid emitting those types early. They will be emitted as
> part of the type chain starting with named type.
> 
> Lexicographical ordering
> 
> Each type is assigned a sort_name and own_name.
> sort_name is the resolved name of the final base type for reference
> types (typedef, pointer, array etc). Sorting by sort_name allows to
> group typedefs of the same base type. sort_name for non-reference type
> is the same as own_name. own_name is a direct name of particular type,
> is used as final sorting step.
> 
> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>

This looks great! Not sure if you experimented with sorting for the
split BTF case (dumping /sys/kernel/btf/tun say); are there any
additional issues in doing that? From what I can see below the sort
would just be applied across base and split BTF and should just work, is
that right? A few suggestions below, but

Reviewed-by: Alan Maguire <alan.maguire@oracle.com>

> ---
>  tools/bpf/bpftool/btf.c | 125 +++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 122 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
> index 91fcb75babe3..09ecd2abf066 100644
> --- a/tools/bpf/bpftool/btf.c
> +++ b/tools/bpf/bpftool/btf.c
> @@ -43,6 +43,13 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
>  	[BTF_KIND_ENUM64]	= "ENUM64",
>  };
>  
> +struct sort_datum {
> +	int index;
> +	int type_rank;
> +	const char *sort_name;
> +	const char *own_name;
> +};
> +
>  static const char *btf_int_enc_str(__u8 encoding)
>  {
>  	switch (encoding) {
> @@ -460,11 +467,114 @@ static void __printf(2, 0) btf_dump_printf(void *ctx,
>  	vfprintf(stdout, fmt, args);
>  }
>  
> +static bool is_reference_type(const struct btf_type *t)
> +{
> +	int kind = btf_kind(t);
> +
> +	return kind == BTF_KIND_CONST || kind == BTF_KIND_PTR || kind == BTF_KIND_VOLATILE ||
> +		kind == BTF_KIND_RESTRICT || kind == BTF_KIND_ARRAY || kind == BTF_KIND_TYPEDEF ||
> +		kind == BTF_KIND_DECL_TAG;
> +}
> +
> +static int btf_type_rank(const struct btf *btf, __u32 index, bool has_name)
> +{
> +	const struct btf_type *t = btf__type_by_id(btf, index);
> +	const int max_rank = 10;
> +	const int kind = btf_kind(t);
> +
> +	if (t->name_off)
> +		has_name = true;
> +
> +	switch (kind) {
> +	case BTF_KIND_ENUM:
> +	case BTF_KIND_ENUM64:
> +		return has_name ? 1 : 0;
> +	case BTF_KIND_INT:
> +	case BTF_KIND_FLOAT:
> +		return 2;
> +	case BTF_KIND_STRUCT:
> +	case BTF_KIND_UNION:
> +		return has_name ? 3 : max_rank;
> +	case BTF_KIND_FUNC_PROTO:
> +		return has_name ? 4 : max_rank;
> +

Don't think a FUNC_PROTO will ever have a name, so has_name check
probably not needed here.

> +	default: {
> +		if (has_name && is_reference_type(t)) {
> +			const int parent = kind == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
> +
> +			return btf_type_rank(btf, parent, has_name);
> +		}
> +		return max_rank;
> +	}
> +	}
> +}
> +
> +static const char *btf_type_sort_name(const struct btf *btf, __u32 index)
> +{
> +	const struct btf_type *t = btf__type_by_id(btf, index);
> +	int kind = btf_kind(t);
> +
> +	/* Use name of the first element for anonymous enums */
> +	if (!t->name_off && (kind == BTF_KIND_ENUM || kind == BTF_KIND_ENUM64) &&
> +	    BTF_INFO_VLEN(t->info))
> +		return btf__name_by_offset(btf, btf_enum(t)->name_off);
> +
> +	/* Return base type name for reference types */
> +	while (is_reference_type(t)) {

The two times is_reference_type() is used, we use this conditional to
get the array type; worth rolling this into a get_reference_type(t)
function that returns t->type for reference types, btf_array(t)->type
for arrays and -1 otherwise perhaps?

> +		index = btf_kind(t) == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
> +		t = btf__type_by_id(btf, index);
> +	}
> +
> +	return btf__name_by_offset(btf, t->name_off);
> +}
> +
> +static int btf_type_compare(const void *left, const void *right)
> +{
> +	const struct sort_datum *datum1 = (const struct sort_datum *)left;
> +	const struct sort_datum *datum2 = (const struct sort_datum *)right;
> +	int sort_name_cmp;
> +
> +	if (datum1->type_rank != datum2->type_rank)
> +		return datum1->type_rank < datum2->type_rank ? -1 : 1;
> +
> +	sort_name_cmp = strcmp(datum1->sort_name, datum2->sort_name);
> +	if (sort_name_cmp)
> +		return sort_name_cmp;
> +
> +	return strcmp(datum1->own_name, datum2->own_name);
> +}
> +
> +static struct sort_datum *sort_btf_c(const struct btf *btf)
> +{
> +	int total_root_types;
> +	struct sort_datum *datums;
> +
> +	total_root_types = btf__type_cnt(btf);
> +	datums = malloc(sizeof(struct sort_datum) * total_root_types);

calloc(total_root_types, sizeof(*datums)) will get you a
zero-initialized array, which may be useful below...

> +	if (!datums)
> +		return NULL;
> +
> +	for (int i = 0; i < total_root_types; ++i) {

you're starting from zero here so you'll get &btf_void below; if you
zero-initialize above I think you can just start from 1.

> +		struct sort_datum *current_datum = datums + i;
> +		const struct btf_type *t = btf__type_by_id(btf, i);
> +
> +		current_datum->index = i;
> +		current_datum->type_rank = btf_type_rank(btf, i, false);
> +		current_datum->sort_name = btf_type_sort_name(btf, i);
> +		current_datum->own_name = btf__name_by_offset(btf, t->name_off);
> +	}
> +
> +	qsort(datums, total_root_types, sizeof(struct sort_datum), btf_type_compare);
> +
> +	return datums;
> +}
> +
>  static int dump_btf_c(const struct btf *btf,
> -		      __u32 *root_type_ids, int root_type_cnt)
> +		      __u32 *root_type_ids, int root_type_cnt, bool sort_dump)
>  {
>  	struct btf_dump *d;
>  	int err = 0, i;
> +	struct sort_datum *datums = NULL;
>  
>  	d = btf_dump__new(btf, btf_dump_printf, NULL, NULL);
>  	if (!d)
> @@ -486,8 +596,12 @@ static int dump_btf_c(const struct btf *btf,
>  	} else {
>  		int cnt = btf__type_cnt(btf);
>  
> +		if (sort_dump)
> +			datums = sort_btf_c(btf);
>  		for (i = 1; i < cnt; i++) {
> -			err = btf_dump__dump_type(d, i);
> +			int idx = datums ? datums[i].index : i;
> +
> +			err = btf_dump__dump_type(d, idx);
>  			if (err)
>  				goto done;
>  		}
> @@ -501,6 +615,7 @@ static int dump_btf_c(const struct btf *btf,
>  
>  done:
>  	btf_dump__free(d);
> +	free(datums);
>  	return err;
>  }
>  
> @@ -553,6 +668,7 @@ static int do_dump(int argc, char **argv)
>  	__u32 root_type_ids[2];
>  	int root_type_cnt = 0;
>  	bool dump_c = false;
> +	bool sort_dump_c = true;
>  	__u32 btf_id = -1;
>  	const char *src;
>  	int fd = -1;
> @@ -663,6 +779,9 @@ static int do_dump(int argc, char **argv)
>  				goto done;
>  			}
>  			NEXT_ARG();
> +		} else if (is_prefix(*argv, "unordered")) {

you'll need to update the man page and add to the bash completion for
this new argument I think.

> +			sort_dump_c = false;
> +			NEXT_ARG();
>  		} else {
>  			p_err("unrecognized option: '%s'", *argv);
>  			err = -EINVAL;
> @@ -691,7 +810,7 @@ static int do_dump(int argc, char **argv)
>  			err = -ENOTSUP;
>  			goto done;
>  		}
> -		err = dump_btf_c(btf, root_type_ids, root_type_cnt);
> +		err = dump_btf_c(btf, root_type_ids, root_type_cnt, sort_dump_c);
>  	} else {
>  		err = dump_btf_raw(btf, root_type_ids, root_type_cnt);
>  	}
Mykyta Yatsenko May 9, 2024, 6:42 p.m. UTC | #2
On 5/9/24 17:08, Alan Maguire wrote:
> On 09/05/2024 16:17, Mykyta@web.codeaurora.org wrote:
>> From: Mykyta Yatsenko <yatsenko@meta.com>
>>
>> Sort bpftool c dump output; aiming to simplify vmlinux.h diffing and
>> forcing more natural type definitions ordering.
>>
>> Definitions are sorted first by their BTF kind ranks, then by their base
>> type name and by their own name.
>>
>> Type ranks
>>
>> Assign ranks to btf kinds (defined in function btf_type_rank) to set
>> next order:
>> 1. Anonymous enums/enums64
>> 2. Named enums/enums64
>> 3. Trivial types typedefs (ints, then floats)
>> 4. Structs/Unions
>> 5. Function prototypes
>> 6. Forward declarations
>>
>> Type rank is set to maximum for unnamed reference types, structs and
>> unions to avoid emitting those types early. They will be emitted as
>> part of the type chain starting with named type.
>>
>> Lexicographical ordering
>>
>> Each type is assigned a sort_name and own_name.
>> sort_name is the resolved name of the final base type for reference
>> types (typedef, pointer, array etc). Sorting by sort_name allows to
>> group typedefs of the same base type. sort_name for non-reference type
>> is the same as own_name. own_name is a direct name of particular type,
>> is used as final sorting step.
>>
>> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> This looks great! Not sure if you experimented with sorting for the
> split BTF case (dumping /sys/kernel/btf/tun say); are there any
> additional issues in doing that? From what I can see below the sort
> would just be applied across base and split BTF and should just work, is
> that right? A few suggestions below, but
This functionality is oblivious to split BTF, dumping 
/sys/kernel/btf/tun will
sort all types across both base and split BTF, not distinguishing where 
those
types come from.
> Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
>
>> ---
>>   tools/bpf/bpftool/btf.c | 125 +++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 122 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
>> index 91fcb75babe3..09ecd2abf066 100644
>> --- a/tools/bpf/bpftool/btf.c
>> +++ b/tools/bpf/bpftool/btf.c
>> @@ -43,6 +43,13 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
>>   	[BTF_KIND_ENUM64]	= "ENUM64",
>>   };
>>   
>> +struct sort_datum {
>> +	int index;
>> +	int type_rank;
>> +	const char *sort_name;
>> +	const char *own_name;
>> +};
>> +
>>   static const char *btf_int_enc_str(__u8 encoding)
>>   {
>>   	switch (encoding) {
>> @@ -460,11 +467,114 @@ static void __printf(2, 0) btf_dump_printf(void *ctx,
>>   	vfprintf(stdout, fmt, args);
>>   }
>>   
>> +static bool is_reference_type(const struct btf_type *t)
>> +{
>> +	int kind = btf_kind(t);
>> +
>> +	return kind == BTF_KIND_CONST || kind == BTF_KIND_PTR || kind == BTF_KIND_VOLATILE ||
>> +		kind == BTF_KIND_RESTRICT || kind == BTF_KIND_ARRAY || kind == BTF_KIND_TYPEDEF ||
>> +		kind == BTF_KIND_DECL_TAG;
>> +}
>> +
>> +static int btf_type_rank(const struct btf *btf, __u32 index, bool has_name)
>> +{
>> +	const struct btf_type *t = btf__type_by_id(btf, index);
>> +	const int max_rank = 10;
>> +	const int kind = btf_kind(t);
>> +
>> +	if (t->name_off)
>> +		has_name = true;
>> +
>> +	switch (kind) {
>> +	case BTF_KIND_ENUM:
>> +	case BTF_KIND_ENUM64:
>> +		return has_name ? 1 : 0;
>> +	case BTF_KIND_INT:
>> +	case BTF_KIND_FLOAT:
>> +		return 2;
>> +	case BTF_KIND_STRUCT:
>> +	case BTF_KIND_UNION:
>> +		return has_name ? 3 : max_rank;
>> +	case BTF_KIND_FUNC_PROTO:
>> +		return has_name ? 4 : max_rank;
>> +
> Don't think a FUNC_PROTO will ever have a name, so has_name check
> probably not needed here.
The reason for that check is to penalize FUNC_PROTO type (assign 
max_rank to it),
but assign rank 4 to typedef type pointing to that FUNC_PROTO. You can 
see that
for reference types this function is called recursively until 
non-reference type
is reached, we assign non-maximum rank only if there was a named type along
the chain of recursive calls. Assigning rank 4 to FUNC_PROTO will lead 
to printing
those function prototypes unordered, as their names are assigned to 
typedef type.

>> +	default: {
>> +		if (has_name && is_reference_type(t)) {
>> +			const int parent = kind == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
>> +
>> +			return btf_type_rank(btf, parent, has_name);
>> +		}
>> +		return max_rank;
>> +	}
>> +	}
>> +}
>> +
>> +static const char *btf_type_sort_name(const struct btf *btf, __u32 index)
>> +{
>> +	const struct btf_type *t = btf__type_by_id(btf, index);
>> +	int kind = btf_kind(t);
>> +
>> +	/* Use name of the first element for anonymous enums */
>> +	if (!t->name_off && (kind == BTF_KIND_ENUM || kind == BTF_KIND_ENUM64) &&
>> +	    BTF_INFO_VLEN(t->info))
>> +		return btf__name_by_offset(btf, btf_enum(t)->name_off);
>> +
>> +	/* Return base type name for reference types */
>> +	while (is_reference_type(t)) {
> The two times is_reference_type() is used, we use this conditional to
> get the array type; worth rolling this into a get_reference_type(t)
> function that returns t->type for reference types, btf_array(t)->type
> for arrays and -1 otherwise perhaps?
Agree.
>
>> +		index = btf_kind(t) == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
>> +		t = btf__type_by_id(btf, index);
>> +	}
>> +
>> +	return btf__name_by_offset(btf, t->name_off);
>> +}
>> +
>> +static int btf_type_compare(const void *left, const void *right)
>> +{
>> +	const struct sort_datum *datum1 = (const struct sort_datum *)left;
>> +	const struct sort_datum *datum2 = (const struct sort_datum *)right;
>> +	int sort_name_cmp;
>> +
>> +	if (datum1->type_rank != datum2->type_rank)
>> +		return datum1->type_rank < datum2->type_rank ? -1 : 1;
>> +
>> +	sort_name_cmp = strcmp(datum1->sort_name, datum2->sort_name);
>> +	if (sort_name_cmp)
>> +		return sort_name_cmp;
>> +
>> +	return strcmp(datum1->own_name, datum2->own_name);
>> +}
>> +
>> +static struct sort_datum *sort_btf_c(const struct btf *btf)
>> +{
>> +	int total_root_types;
>> +	struct sort_datum *datums;
>> +
>> +	total_root_types = btf__type_cnt(btf);
>> +	datums = malloc(sizeof(struct sort_datum) * total_root_types);
> calloc(total_root_types, sizeof(*datums)) will get you a
> zero-initialized array, which may be useful below...
>
>> +	if (!datums)
>> +		return NULL;
>> +
>> +	for (int i = 0; i < total_root_types; ++i) {
> you're starting from zero here so you'll get &btf_void below; if you
> zero-initialize above I think you can just start from 1.
>
>> +		struct sort_datum *current_datum = datums + i;
>> +		const struct btf_type *t = btf__type_by_id(btf, i);
>> +
>> +		current_datum->index = i;
>> +		current_datum->type_rank = btf_type_rank(btf, i, false);
>> +		current_datum->sort_name = btf_type_sort_name(btf, i);
>> +		current_datum->own_name = btf__name_by_offset(btf, t->name_off);
>> +	}
>> +
>> +	qsort(datums, total_root_types, sizeof(struct sort_datum), btf_type_compare);
>> +
>> +	return datums;
>> +}
>> +
>>   static int dump_btf_c(const struct btf *btf,
>> -		      __u32 *root_type_ids, int root_type_cnt)
>> +		      __u32 *root_type_ids, int root_type_cnt, bool sort_dump)
>>   {
>>   	struct btf_dump *d;
>>   	int err = 0, i;
>> +	struct sort_datum *datums = NULL;
>>   
>>   	d = btf_dump__new(btf, btf_dump_printf, NULL, NULL);
>>   	if (!d)
>> @@ -486,8 +596,12 @@ static int dump_btf_c(const struct btf *btf,
>>   	} else {
>>   		int cnt = btf__type_cnt(btf);
>>   
>> +		if (sort_dump)
>> +			datums = sort_btf_c(btf);
>>   		for (i = 1; i < cnt; i++) {
>> -			err = btf_dump__dump_type(d, i);
>> +			int idx = datums ? datums[i].index : i;
>> +
>> +			err = btf_dump__dump_type(d, idx);
>>   			if (err)
>>   				goto done;
>>   		}
>> @@ -501,6 +615,7 @@ static int dump_btf_c(const struct btf *btf,
>>   
>>   done:
>>   	btf_dump__free(d);
>> +	free(datums);
>>   	return err;
>>   }
>>   
>> @@ -553,6 +668,7 @@ static int do_dump(int argc, char **argv)
>>   	__u32 root_type_ids[2];
>>   	int root_type_cnt = 0;
>>   	bool dump_c = false;
>> +	bool sort_dump_c = true;
>>   	__u32 btf_id = -1;
>>   	const char *src;
>>   	int fd = -1;
>> @@ -663,6 +779,9 @@ static int do_dump(int argc, char **argv)
>>   				goto done;
>>   			}
>>   			NEXT_ARG();
>> +		} else if (is_prefix(*argv, "unordered")) {
> you'll need to update the man page and add to the bash completion for
> this new argument I think.
>
>> +			sort_dump_c = false;
>> +			NEXT_ARG();
>>   		} else {
>>   			p_err("unrecognized option: '%s'", *argv);
>>   			err = -EINVAL;
>> @@ -691,7 +810,7 @@ static int do_dump(int argc, char **argv)
>>   			err = -ENOTSUP;
>>   			goto done;
>>   		}
>> -		err = dump_btf_c(btf, root_type_ids, root_type_cnt);
>> +		err = dump_btf_c(btf, root_type_ids, root_type_cnt, sort_dump_c);
>>   	} else {
>>   		err = dump_btf_raw(btf, root_type_ids, root_type_cnt);
>>   	}
Andrii Nakryiko May 9, 2024, 9:27 p.m. UTC | #3
On Thu, May 9, 2024 at 11:42 AM Mykyta Yatsenko
<mykyta.yatsenko5@gmail.com> wrote:
>
> On 5/9/24 17:08, Alan Maguire wrote:
> > On 09/05/2024 16:17, Mykyta@web.codeaurora.org wrote:
> >> From: Mykyta Yatsenko <yatsenko@meta.com>
> >>
> >> Sort bpftool c dump output; aiming to simplify vmlinux.h diffing and
> >> forcing more natural type definitions ordering.
> >>
> >> Definitions are sorted first by their BTF kind ranks, then by their base
> >> type name and by their own name.
> >>
> >> Type ranks
> >>
> >> Assign ranks to btf kinds (defined in function btf_type_rank) to set
> >> next order:
> >> 1. Anonymous enums/enums64
> >> 2. Named enums/enums64
> >> 3. Trivial types typedefs (ints, then floats)
> >> 4. Structs/Unions
> >> 5. Function prototypes
> >> 6. Forward declarations
> >>
> >> Type rank is set to maximum for unnamed reference types, structs and
> >> unions to avoid emitting those types early. They will be emitted as
> >> part of the type chain starting with named type.
> >>
> >> Lexicographical ordering
> >>
> >> Each type is assigned a sort_name and own_name.
> >> sort_name is the resolved name of the final base type for reference
> >> types (typedef, pointer, array etc). Sorting by sort_name allows to
> >> group typedefs of the same base type. sort_name for non-reference type
> >> is the same as own_name. own_name is a direct name of particular type,
> >> is used as final sorting step.
> >>
> >> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> > This looks great! Not sure if you experimented with sorting for the
> > split BTF case (dumping /sys/kernel/btf/tun say); are there any
> > additional issues in doing that? From what I can see below the sort
> > would just be applied across base and split BTF and should just work, is
> > that right? A few suggestions below, but
> This functionality is oblivious to split BTF, dumping
> /sys/kernel/btf/tun will
> sort all types across both base and split BTF, not distinguishing where
> those
> types come from.
> > Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
> >
> >> ---
> >>   tools/bpf/bpftool/btf.c | 125 +++++++++++++++++++++++++++++++++++++++-
> >>   1 file changed, 122 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
> >> index 91fcb75babe3..09ecd2abf066 100644
> >> --- a/tools/bpf/bpftool/btf.c
> >> +++ b/tools/bpf/bpftool/btf.c
> >> @@ -43,6 +43,13 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
> >>      [BTF_KIND_ENUM64]       = "ENUM64",
> >>   };
> >>
> >> +struct sort_datum {
> >> +    int index;
> >> +    int type_rank;
> >> +    const char *sort_name;
> >> +    const char *own_name;
> >> +};
> >> +
> >>   static const char *btf_int_enc_str(__u8 encoding)
> >>   {
> >>      switch (encoding) {
> >> @@ -460,11 +467,114 @@ static void __printf(2, 0) btf_dump_printf(void *ctx,
> >>      vfprintf(stdout, fmt, args);
> >>   }
> >>
> >> +static bool is_reference_type(const struct btf_type *t)
> >> +{
> >> +    int kind = btf_kind(t);
> >> +
> >> +    return kind == BTF_KIND_CONST || kind == BTF_KIND_PTR || kind == BTF_KIND_VOLATILE ||
> >> +            kind == BTF_KIND_RESTRICT || kind == BTF_KIND_ARRAY || kind == BTF_KIND_TYPEDEF ||
> >> +            kind == BTF_KIND_DECL_TAG;
> >> +}
> >> +
> >> +static int btf_type_rank(const struct btf *btf, __u32 index, bool has_name)
> >> +{
> >> +    const struct btf_type *t = btf__type_by_id(btf, index);
> >> +    const int max_rank = 10;
> >> +    const int kind = btf_kind(t);
> >> +
> >> +    if (t->name_off)
> >> +            has_name = true;
> >> +
> >> +    switch (kind) {
> >> +    case BTF_KIND_ENUM:
> >> +    case BTF_KIND_ENUM64:
> >> +            return has_name ? 1 : 0;
> >> +    case BTF_KIND_INT:
> >> +    case BTF_KIND_FLOAT:
> >> +            return 2;
> >> +    case BTF_KIND_STRUCT:
> >> +    case BTF_KIND_UNION:
> >> +            return has_name ? 3 : max_rank;
> >> +    case BTF_KIND_FUNC_PROTO:
> >> +            return has_name ? 4 : max_rank;
> >> +
> > Don't think a FUNC_PROTO will ever have a name, so has_name check
> > probably not needed here.
> The reason for that check is to penalize FUNC_PROTO type (assign
> max_rank to it),
> but assign rank 4 to typedef type pointing to that FUNC_PROTO. You can
> see that
> for reference types this function is called recursively until
> non-reference type
> is reached, we assign non-maximum rank only if there was a named type along
> the chain of recursive calls. Assigning rank 4 to FUNC_PROTO will lead
> to printing
> those function prototypes unordered, as their names are assigned to
> typedef type.
>
> >> +    default: {
> >> +            if (has_name && is_reference_type(t)) {
> >> +                    const int parent = kind == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
> >> +
> >> +                    return btf_type_rank(btf, parent, has_name);
> >> +            }
> >> +            return max_rank;
> >> +    }
> >> +    }
> >> +}
> >> +
> >> +static const char *btf_type_sort_name(const struct btf *btf, __u32 index)
> >> +{
> >> +    const struct btf_type *t = btf__type_by_id(btf, index);
> >> +    int kind = btf_kind(t);
> >> +
> >> +    /* Use name of the first element for anonymous enums */
> >> +    if (!t->name_off && (kind == BTF_KIND_ENUM || kind == BTF_KIND_ENUM64) &&
> >> +        BTF_INFO_VLEN(t->info))
> >> +            return btf__name_by_offset(btf, btf_enum(t)->name_off);
> >> +
> >> +    /* Return base type name for reference types */
> >> +    while (is_reference_type(t)) {
> > The two times is_reference_type() is used, we use this conditional to
> > get the array type; worth rolling this into a get_reference_type(t)
> > function that returns t->type for reference types, btf_array(t)->type
> > for arrays and -1 otherwise perhaps?
> Agree.

I'd use 0, it can mean both "not valid type ID", but also doesn't need
extra checking because it's still a valid type if you need to get
btf__type_by_id() (you'll get VOID type) and/or get it's name (it will
be an empty name).

> >
> >> +            index = btf_kind(t) == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
> >> +            t = btf__type_by_id(btf, index);
> >> +    }
> >> +
> >> +    return btf__name_by_offset(btf, t->name_off);
> >> +}
> >> +
> >> +static int btf_type_compare(const void *left, const void *right)
> >> +{
> >> +    const struct sort_datum *datum1 = (const struct sort_datum *)left;
> >> +    const struct sort_datum *datum2 = (const struct sort_datum *)right;
> >> +    int sort_name_cmp;
> >> +
> >> +    if (datum1->type_rank != datum2->type_rank)
> >> +            return datum1->type_rank < datum2->type_rank ? -1 : 1;
> >> +
> >> +    sort_name_cmp = strcmp(datum1->sort_name, datum2->sort_name);
> >> +    if (sort_name_cmp)
> >> +            return sort_name_cmp;
> >> +
> >> +    return strcmp(datum1->own_name, datum2->own_name);
> >> +}
> >> +
> >> +static struct sort_datum *sort_btf_c(const struct btf *btf)
> >> +{
> >> +    int total_root_types;
> >> +    struct sort_datum *datums;
> >> +
> >> +    total_root_types = btf__type_cnt(btf);
> >> +    datums = malloc(sizeof(struct sort_datum) * total_root_types);
> > calloc(total_root_types, sizeof(*datums)) will get you a
> > zero-initialized array, which may be useful below...
> >
> >> +    if (!datums)
> >> +            return NULL;
> >> +
> >> +    for (int i = 0; i < total_root_types; ++i) {
> > you're starting from zero here so you'll get &btf_void below; if you
> > zero-initialize above I think you can just start from 1.
> >
> >> +            struct sort_datum *current_datum = datums + i;
> >> +            const struct btf_type *t = btf__type_by_id(btf, i);
> >> +
> >> +            current_datum->index = i;
> >> +            current_datum->type_rank = btf_type_rank(btf, i, false);
> >> +            current_datum->sort_name = btf_type_sort_name(btf, i);
> >> +            current_datum->own_name = btf__name_by_offset(btf, t->name_off);
> >> +    }
> >> +
> >> +    qsort(datums, total_root_types, sizeof(struct sort_datum), btf_type_compare);
> >> +
> >> +    return datums;
> >> +}
> >> +
> >>   static int dump_btf_c(const struct btf *btf,
> >> -                  __u32 *root_type_ids, int root_type_cnt)
> >> +                  __u32 *root_type_ids, int root_type_cnt, bool sort_dump)
> >>   {
> >>      struct btf_dump *d;
> >>      int err = 0, i;
> >> +    struct sort_datum *datums = NULL;
> >>
> >>      d = btf_dump__new(btf, btf_dump_printf, NULL, NULL);
> >>      if (!d)
> >> @@ -486,8 +596,12 @@ static int dump_btf_c(const struct btf *btf,
> >>      } else {
> >>              int cnt = btf__type_cnt(btf);
> >>
> >> +            if (sort_dump)
> >> +                    datums = sort_btf_c(btf);
> >>              for (i = 1; i < cnt; i++) {
> >> -                    err = btf_dump__dump_type(d, i);
> >> +                    int idx = datums ? datums[i].index : i;
> >> +
> >> +                    err = btf_dump__dump_type(d, idx);
> >>                      if (err)
> >>                              goto done;
> >>              }
> >> @@ -501,6 +615,7 @@ static int dump_btf_c(const struct btf *btf,
> >>
> >>   done:
> >>      btf_dump__free(d);
> >> +    free(datums);
> >>      return err;
> >>   }
> >>
> >> @@ -553,6 +668,7 @@ static int do_dump(int argc, char **argv)
> >>      __u32 root_type_ids[2];
> >>      int root_type_cnt = 0;
> >>      bool dump_c = false;
> >> +    bool sort_dump_c = true;
> >>      __u32 btf_id = -1;
> >>      const char *src;
> >>      int fd = -1;
> >> @@ -663,6 +779,9 @@ static int do_dump(int argc, char **argv)
> >>                              goto done;
> >>                      }
> >>                      NEXT_ARG();
> >> +            } else if (is_prefix(*argv, "unordered")) {
> > you'll need to update the man page and add to the bash completion for
> > this new argument I think.
> >
> >> +                    sort_dump_c = false;
> >> +                    NEXT_ARG();
> >>              } else {
> >>                      p_err("unrecognized option: '%s'", *argv);
> >>                      err = -EINVAL;
> >> @@ -691,7 +810,7 @@ static int do_dump(int argc, char **argv)
> >>                      err = -ENOTSUP;
> >>                      goto done;
> >>              }
> >> -            err = dump_btf_c(btf, root_type_ids, root_type_cnt);
> >> +            err = dump_btf_c(btf, root_type_ids, root_type_cnt, sort_dump_c);
> >>      } else {
> >>              err = dump_btf_raw(btf, root_type_ids, root_type_cnt);
> >>      }
>
>
Andrii Nakryiko May 9, 2024, 9:39 p.m. UTC | #4
On Thu, May 9, 2024 at 8:17 AM Mykyta Yatsenko
<mykyta.yatsenko5@gmail.com> wrote:
>
> From: Mykyta Yatsenko <yatsenko@meta.com>
>
> Sort bpftool c dump output; aiming to simplify vmlinux.h diffing and
> forcing more natural type definitions ordering.
>
> Definitions are sorted first by their BTF kind ranks, then by their base
> type name and by their own name.
>
> Type ranks
>
> Assign ranks to btf kinds (defined in function btf_type_rank) to set
> next order:
> 1. Anonymous enums/enums64
> 2. Named enums/enums64
> 3. Trivial types typedefs (ints, then floats)
> 4. Structs/Unions
> 5. Function prototypes
> 6. Forward declarations
>
> Type rank is set to maximum for unnamed reference types, structs and
> unions to avoid emitting those types early. They will be emitted as
> part of the type chain starting with named type.
>
> Lexicographical ordering
>
> Each type is assigned a sort_name and own_name.
> sort_name is the resolved name of the final base type for reference
> types (typedef, pointer, array etc). Sorting by sort_name allows to
> group typedefs of the same base type. sort_name for non-reference type
> is the same as own_name. own_name is a direct name of particular type,
> is used as final sorting step.
>
> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> ---
>  tools/bpf/bpftool/btf.c | 125 +++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 122 insertions(+), 3 deletions(-)
>

It's getting very close, see a bunch of nits below.

> diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
> index 91fcb75babe3..09ecd2abf066 100644
> --- a/tools/bpf/bpftool/btf.c
> +++ b/tools/bpf/bpftool/btf.c
> @@ -43,6 +43,13 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
>         [BTF_KIND_ENUM64]       = "ENUM64",
>  };
>
> +struct sort_datum {
> +       int index;
> +       int type_rank;
> +       const char *sort_name;
> +       const char *own_name;
> +};
> +
>  static const char *btf_int_enc_str(__u8 encoding)
>  {
>         switch (encoding) {
> @@ -460,11 +467,114 @@ static void __printf(2, 0) btf_dump_printf(void *ctx,
>         vfprintf(stdout, fmt, args);
>  }
>
> +static bool is_reference_type(const struct btf_type *t)
> +{
> +       int kind = btf_kind(t);
> +
> +       return kind == BTF_KIND_CONST || kind == BTF_KIND_PTR || kind == BTF_KIND_VOLATILE ||
> +               kind == BTF_KIND_RESTRICT || kind == BTF_KIND_ARRAY || kind == BTF_KIND_TYPEDEF ||
> +               kind == BTF_KIND_DECL_TAG;

probably best to write as a switch, also make sure that
BTF_KIND_TYPE_TAG is supported, it is effectively treated as
CONST/VOLATILE

and actually looking below, I'd just incorporate these as extra cases
in the existing btf_type_rank() switch, and then have a similar
open-coded switch for btf_type_sort_name()

When dealing with BTF I find these explicit switches listing what
kinds are special and how they are processed much easier to check and
follow than any of the extra helpers doing some kind checks


> +}
> +
> +static int btf_type_rank(const struct btf *btf, __u32 index, bool has_name)
> +{
> +       const struct btf_type *t = btf__type_by_id(btf, index);
> +       const int max_rank = 10;
> +       const int kind = btf_kind(t);
> +
> +       if (t->name_off)
> +               has_name = true;
> +
> +       switch (kind) {
> +       case BTF_KIND_ENUM:
> +       case BTF_KIND_ENUM64:
> +               return has_name ? 1 : 0;
> +       case BTF_KIND_INT:
> +       case BTF_KIND_FLOAT:
> +               return 2;
> +       case BTF_KIND_STRUCT:
> +       case BTF_KIND_UNION:
> +               return has_name ? 3 : max_rank;
> +       case BTF_KIND_FUNC_PROTO:
> +               return has_name ? 4 : max_rank;
> +
> +       default: {
> +               if (has_name && is_reference_type(t)) {
> +                       const int parent = kind == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
> +
> +                       return btf_type_rank(btf, parent, has_name);
> +               }
> +               return max_rank;
> +       }

nit: you don't need these {} for default

> +       }
> +}
> +
> +static const char *btf_type_sort_name(const struct btf *btf, __u32 index)
> +{
> +       const struct btf_type *t = btf__type_by_id(btf, index);
> +       int kind = btf_kind(t);
> +
> +       /* Use name of the first element for anonymous enums */
> +       if (!t->name_off && (kind == BTF_KIND_ENUM || kind == BTF_KIND_ENUM64) &&

there is btf_is_any_enum() helper for this kind check

> +           BTF_INFO_VLEN(t->info))

please use btf_vlen(t) helper

> +               return btf__name_by_offset(btf, btf_enum(t)->name_off);

what if enum's vlen == 0? I think I mentioned that before, it
shouldn't happen in valid BTF, but it's technically allowable in BTF,
so best to be able to handle that instead of crashing or doing random
memory reads.

> +
> +       /* Return base type name for reference types */
> +       while (is_reference_type(t)) {
> +               index = btf_kind(t) == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
> +               t = btf__type_by_id(btf, index);
> +       }
> +
> +       return btf__name_by_offset(btf, t->name_off);
> +}
> +
> +static int btf_type_compare(const void *left, const void *right)
> +{
> +       const struct sort_datum *datum1 = (const struct sort_datum *)left;
> +       const struct sort_datum *datum2 = (const struct sort_datum *)right;
> +       int sort_name_cmp;

stylistic nit: it's minot, but I'd use less distracting naming. Eg., d1, d2, r.

> +
> +       if (datum1->type_rank != datum2->type_rank)
> +               return datum1->type_rank < datum2->type_rank ? -1 : 1;
> +
> +       sort_name_cmp = strcmp(datum1->sort_name, datum2->sort_name);
> +       if (sort_name_cmp)
> +               return sort_name_cmp;
> +
> +       return strcmp(datum1->own_name, datum2->own_name);
> +}
> +
> +static struct sort_datum *sort_btf_c(const struct btf *btf)
> +{
> +       int total_root_types;
> +       struct sort_datum *datums;
> +
> +       total_root_types = btf__type_cnt(btf);

nit: s/total_root_types/n/

> +       datums = malloc(sizeof(struct sort_datum) * total_root_types);
> +       if (!datums)
> +               return NULL;
> +
> +       for (int i = 0; i < total_root_types; ++i) {
> +               struct sort_datum *current_datum = datums + i;

nit: just d for the name, it's not going to be hard to follow or ambiguous

> +               const struct btf_type *t = btf__type_by_id(btf, i);
> +
> +               current_datum->index = i;
> +               current_datum->type_rank = btf_type_rank(btf, i, false);
> +               current_datum->sort_name = btf_type_sort_name(btf, i);
> +               current_datum->own_name = btf__name_by_offset(btf, t->name_off);
> +       }
> +
> +       qsort(datums, total_root_types, sizeof(struct sort_datum), btf_type_compare);
> +
> +       return datums;
> +}
> +
>  static int dump_btf_c(const struct btf *btf,
> -                     __u32 *root_type_ids, int root_type_cnt)
> +                     __u32 *root_type_ids, int root_type_cnt, bool sort_dump)
>  {
>         struct btf_dump *d;
>         int err = 0, i;
> +       struct sort_datum *datums = NULL;
>
>         d = btf_dump__new(btf, btf_dump_printf, NULL, NULL);
>         if (!d)
> @@ -486,8 +596,12 @@ static int dump_btf_c(const struct btf *btf,
>         } else {
>                 int cnt = btf__type_cnt(btf);
>
> +               if (sort_dump)
> +                       datums = sort_btf_c(btf);
>                 for (i = 1; i < cnt; i++) {
> -                       err = btf_dump__dump_type(d, i);
> +                       int idx = datums ? datums[i].index : i;
> +
> +                       err = btf_dump__dump_type(d, idx);
>                         if (err)
>                                 goto done;
>                 }
> @@ -501,6 +615,7 @@ static int dump_btf_c(const struct btf *btf,
>
>  done:
>         btf_dump__free(d);
> +       free(datums);
>         return err;
>  }
>
> @@ -553,6 +668,7 @@ static int do_dump(int argc, char **argv)
>         __u32 root_type_ids[2];
>         int root_type_cnt = 0;
>         bool dump_c = false;
> +       bool sort_dump_c = true;
>         __u32 btf_id = -1;
>         const char *src;
>         int fd = -1;
> @@ -663,6 +779,9 @@ static int do_dump(int argc, char **argv)
>                                 goto done;
>                         }
>                         NEXT_ARG();
> +               } else if (is_prefix(*argv, "unordered")) {

it's more of a "original order" rather than unordered, so maybe "unnormalized"?

> +                       sort_dump_c = false;
> +                       NEXT_ARG();
>                 } else {
>                         p_err("unrecognized option: '%s'", *argv);
>                         err = -EINVAL;
> @@ -691,7 +810,7 @@ static int do_dump(int argc, char **argv)
>                         err = -ENOTSUP;
>                         goto done;
>                 }
> -               err = dump_btf_c(btf, root_type_ids, root_type_cnt);
> +               err = dump_btf_c(btf, root_type_ids, root_type_cnt, sort_dump_c);
>         } else {
>                 err = dump_btf_raw(btf, root_type_ids, root_type_cnt);
>         }
> --
> 2.44.0
>
Quentin Monnet May 9, 2024, 11:09 p.m. UTC | #5
On 09/05/2024 22:39, Andrii Nakryiko wrote:
> On Thu, May 9, 2024 at 8:17 AM Mykyta Yatsenko
> <mykyta.yatsenko5@gmail.com> wrote:
>>
>> From: Mykyta Yatsenko <yatsenko@meta.com>
>>
>> Sort bpftool c dump output; aiming to simplify vmlinux.h diffing and
>> forcing more natural type definitions ordering.
>>
>> Definitions are sorted first by their BTF kind ranks, then by their base
>> type name and by their own name.
>>
>> Type ranks
>>
>> Assign ranks to btf kinds (defined in function btf_type_rank) to set
>> next order:
>> 1. Anonymous enums/enums64
>> 2. Named enums/enums64
>> 3. Trivial types typedefs (ints, then floats)
>> 4. Structs/Unions
>> 5. Function prototypes
>> 6. Forward declarations
>>
>> Type rank is set to maximum for unnamed reference types, structs and
>> unions to avoid emitting those types early. They will be emitted as
>> part of the type chain starting with named type.
>>
>> Lexicographical ordering
>>
>> Each type is assigned a sort_name and own_name.
>> sort_name is the resolved name of the final base type for reference
>> types (typedef, pointer, array etc). Sorting by sort_name allows to
>> group typedefs of the same base type. sort_name for non-reference type
>> is the same as own_name. own_name is a direct name of particular type,
>> is used as final sorting step.
>>
>> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
>> ---
>>  tools/bpf/bpftool/btf.c | 125 +++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 122 insertions(+), 3 deletions(-)
>>
> 
> It's getting very close, see a bunch of nits below.

Agreed, it's in a nice shape, thanks a lot for this work. Apologies for
the review delay, I just have a few additional nits.

> 
>> diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
>> index 91fcb75babe3..09ecd2abf066 100644
>> --- a/tools/bpf/bpftool/btf.c
>> +++ b/tools/bpf/bpftool/btf.c
>> @@ -43,6 +43,13 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
>>         [BTF_KIND_ENUM64]       = "ENUM64",
>>  };
>>
>> +struct sort_datum {
>> +       int index;
>> +       int type_rank;
>> +       const char *sort_name;
>> +       const char *own_name;
>> +};
>> +
>>  static const char *btf_int_enc_str(__u8 encoding)
>>  {
>>         switch (encoding) {
>> @@ -460,11 +467,114 @@ static void __printf(2, 0) btf_dump_printf(void *ctx,
>>         vfprintf(stdout, fmt, args);
>>  }
>>
>> +static bool is_reference_type(const struct btf_type *t)
>> +{
>> +       int kind = btf_kind(t);
>> +
>> +       return kind == BTF_KIND_CONST || kind == BTF_KIND_PTR || kind == BTF_KIND_VOLATILE ||
>> +               kind == BTF_KIND_RESTRICT || kind == BTF_KIND_ARRAY || kind == BTF_KIND_TYPEDEF ||
>> +               kind == BTF_KIND_DECL_TAG;
> 
> probably best to write as a switch, also make sure that
> BTF_KIND_TYPE_TAG is supported, it is effectively treated as
> CONST/VOLATILE
> 
> and actually looking below, I'd just incorporate these as extra cases
> in the existing btf_type_rank() switch, and then have a similar
> open-coded switch for btf_type_sort_name()
> 
> When dealing with BTF I find these explicit switches listing what
> kinds are special and how they are processed much easier to check and
> follow than any of the extra helpers doing some kind checks
> 
> 
>> +}
>> +
>> +static int btf_type_rank(const struct btf *btf, __u32 index, bool has_name)
>> +{
>> +       const struct btf_type *t = btf__type_by_id(btf, index);
>> +       const int max_rank = 10;
>> +       const int kind = btf_kind(t);
>> +
>> +       if (t->name_off)
>> +               has_name = true;
>> +
>> +       switch (kind) {
>> +       case BTF_KIND_ENUM:
>> +       case BTF_KIND_ENUM64:
>> +               return has_name ? 1 : 0;
>> +       case BTF_KIND_INT:
>> +       case BTF_KIND_FLOAT:
>> +               return 2;
>> +       case BTF_KIND_STRUCT:
>> +       case BTF_KIND_UNION:
>> +               return has_name ? 3 : max_rank;
>> +       case BTF_KIND_FUNC_PROTO:
>> +               return has_name ? 4 : max_rank;
>> +
>> +       default: {
>> +               if (has_name && is_reference_type(t)) {
>> +                       const int parent = kind == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
>> +
>> +                       return btf_type_rank(btf, parent, has_name);
>> +               }
>> +               return max_rank;
>> +       }
> 
> nit: you don't need these {} for default
> 
>> +       }
>> +}
>> +
>> +static const char *btf_type_sort_name(const struct btf *btf, __u32 index)
>> +{
>> +       const struct btf_type *t = btf__type_by_id(btf, index);
>> +       int kind = btf_kind(t);
>> +
>> +       /* Use name of the first element for anonymous enums */
>> +       if (!t->name_off && (kind == BTF_KIND_ENUM || kind == BTF_KIND_ENUM64) &&
> 
> there is btf_is_any_enum() helper for this kind check
> 
>> +           BTF_INFO_VLEN(t->info))
> 
> please use btf_vlen(t) helper
> 
>> +               return btf__name_by_offset(btf, btf_enum(t)->name_off);
> 
> what if enum's vlen == 0? I think I mentioned that before, it
> shouldn't happen in valid BTF, but it's technically allowable in BTF,
> so best to be able to handle that instead of crashing or doing random
> memory reads.
> 
>> +
>> +       /* Return base type name for reference types */
>> +       while (is_reference_type(t)) {
>> +               index = btf_kind(t) == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
>> +               t = btf__type_by_id(btf, index);
>> +       }
>> +
>> +       return btf__name_by_offset(btf, t->name_off);
>> +}
>> +
>> +static int btf_type_compare(const void *left, const void *right)
>> +{
>> +       const struct sort_datum *datum1 = (const struct sort_datum *)left;
>> +       const struct sort_datum *datum2 = (const struct sort_datum *)right;
>> +       int sort_name_cmp;
> 
> stylistic nit: it's minot, but I'd use less distracting naming. Eg., d1, d2, r.
> 
>> +
>> +       if (datum1->type_rank != datum2->type_rank)
>> +               return datum1->type_rank < datum2->type_rank ? -1 : 1;
>> +
>> +       sort_name_cmp = strcmp(datum1->sort_name, datum2->sort_name);
>> +       if (sort_name_cmp)
>> +               return sort_name_cmp;
>> +
>> +       return strcmp(datum1->own_name, datum2->own_name);
>> +}
>> +
>> +static struct sort_datum *sort_btf_c(const struct btf *btf)
>> +{
>> +       int total_root_types;
>> +       struct sort_datum *datums;
>> +
>> +       total_root_types = btf__type_cnt(btf);
> 
> nit: s/total_root_types/n/
> 
>> +       datums = malloc(sizeof(struct sort_datum) * total_root_types);
>> +       if (!datums)
>> +               return NULL;
>> +
>> +       for (int i = 0; i < total_root_types; ++i) {
>> +               struct sort_datum *current_datum = datums + i;
> 
> nit: just d for the name, it's not going to be hard to follow or ambiguous
> 
>> +               const struct btf_type *t = btf__type_by_id(btf, i);
>> +
>> +               current_datum->index = i;
>> +               current_datum->type_rank = btf_type_rank(btf, i, false);
>> +               current_datum->sort_name = btf_type_sort_name(btf, i);
>> +               current_datum->own_name = btf__name_by_offset(btf, t->name_off);
>> +       }
>> +
>> +       qsort(datums, total_root_types, sizeof(struct sort_datum), btf_type_compare);
>> +
>> +       return datums;
>> +}
>> +
>>  static int dump_btf_c(const struct btf *btf,
>> -                     __u32 *root_type_ids, int root_type_cnt)
>> +                     __u32 *root_type_ids, int root_type_cnt, bool sort_dump)
>>  {
>>         struct btf_dump *d;
>>         int err = 0, i;
>> +       struct sort_datum *datums = NULL;

Nit: Most variables in the file are declared in "reverse-Christmas-tree"
order (longest lines first, unless there's a reason not to). Could you
please try to preserve this order, here and elsewhere, for consistency?

>>
>>         d = btf_dump__new(btf, btf_dump_printf, NULL, NULL);
>>         if (!d)
>> @@ -486,8 +596,12 @@ static int dump_btf_c(const struct btf *btf,
>>         } else {
>>                 int cnt = btf__type_cnt(btf);
>>
>> +               if (sort_dump)
>> +                       datums = sort_btf_c(btf);
>>                 for (i = 1; i < cnt; i++) {
>> -                       err = btf_dump__dump_type(d, i);
>> +                       int idx = datums ? datums[i].index : i;
>> +
>> +                       err = btf_dump__dump_type(d, idx);
>>                         if (err)
>>                                 goto done;
>>                 }
>> @@ -501,6 +615,7 @@ static int dump_btf_c(const struct btf *btf,
>>
>>  done:
>>         btf_dump__free(d);
>> +       free(datums);

Small nit: I'd swap the two lines above, it would seem more logical to
free in the reverse order from allocation and would be more
straightforward to "split" if we ever need to free d only when jumping
from the first goto.

>>         return err;
>>  }
>>
>> @@ -553,6 +668,7 @@ static int do_dump(int argc, char **argv)
>>         __u32 root_type_ids[2];
>>         int root_type_cnt = 0;
>>         bool dump_c = false;
>> +       bool sort_dump_c = true;
>>         __u32 btf_id = -1;
>>         const char *src;
>>         int fd = -1;
>> @@ -663,6 +779,9 @@ static int do_dump(int argc, char **argv)
>>                                 goto done;
>>                         }
>>                         NEXT_ARG();
>> +               } else if (is_prefix(*argv, "unordered")) {
> 
> it's more of a "original order" rather than unordered, so maybe "unnormalized"?
I'd have gone with "unsorted", but Andrii's reasoning probably applies
the same to it. I find "unnormalized" might be difficult to understand,
maybe "preserve_order" or, shorter, "keep_order"?

And as Alan mentioned on the other thread, we'll need the following
updates for the new keyword:

- Adding the keyword to the command summary at the top of
  tools/bpf/bpftool/Documentation/bpftool-btf.rst:
  | *FORMAT* := { **raw** | **c** [**unordered**]}
  (or whatever keyword we pick)
- Adding the description for the keyword, below on the same page
- Adding the keyword to the help message, at the end of btf.c
- Updating the bash completion. The patch below should work (to adjust
  with the final keyword):

------
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 04afe2ac2228..85a43c867e5f 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -930,6 +930,9 @@ _bpftool()
                         format)
                             COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) )
                             ;;
+                        c)
+                            COMPREPLY=( $( compgen -W "unordered" -- "$cur" ) )
+                            ;;
                         *)
                             # emit extra options
                             case ${words[3]} in
------
Andrii Nakryiko May 9, 2024, 11:14 p.m. UTC | #6
On Thu, May 9, 2024 at 4:09 PM Quentin Monnet <qmo@kernel.org> wrote:
>
> On 09/05/2024 22:39, Andrii Nakryiko wrote:
> > On Thu, May 9, 2024 at 8:17 AM Mykyta Yatsenko
> > <mykyta.yatsenko5@gmail.com> wrote:
> >>
> >> From: Mykyta Yatsenko <yatsenko@meta.com>
> >>
> >> Sort bpftool c dump output; aiming to simplify vmlinux.h diffing and
> >> forcing more natural type definitions ordering.
> >>
> >> Definitions are sorted first by their BTF kind ranks, then by their base
> >> type name and by their own name.
> >>
> >> Type ranks
> >>
> >> Assign ranks to btf kinds (defined in function btf_type_rank) to set
> >> next order:
> >> 1. Anonymous enums/enums64
> >> 2. Named enums/enums64
> >> 3. Trivial types typedefs (ints, then floats)
> >> 4. Structs/Unions
> >> 5. Function prototypes
> >> 6. Forward declarations
> >>
> >> Type rank is set to maximum for unnamed reference types, structs and
> >> unions to avoid emitting those types early. They will be emitted as
> >> part of the type chain starting with named type.
> >>
> >> Lexicographical ordering
> >>
> >> Each type is assigned a sort_name and own_name.
> >> sort_name is the resolved name of the final base type for reference
> >> types (typedef, pointer, array etc). Sorting by sort_name allows to
> >> group typedefs of the same base type. sort_name for non-reference type
> >> is the same as own_name. own_name is a direct name of particular type,
> >> is used as final sorting step.
> >>
> >> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> >> ---
> >>  tools/bpf/bpftool/btf.c | 125 +++++++++++++++++++++++++++++++++++++++-
> >>  1 file changed, 122 insertions(+), 3 deletions(-)
> >>
> >
> > It's getting very close, see a bunch of nits below.
>
> Agreed, it's in a nice shape, thanks a lot for this work. Apologies for
> the review delay, I just have a few additional nits.
>
> >
> >> diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
> >> index 91fcb75babe3..09ecd2abf066 100644
> >> --- a/tools/bpf/bpftool/btf.c
> >> +++ b/tools/bpf/bpftool/btf.c
> >> @@ -43,6 +43,13 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
> >>         [BTF_KIND_ENUM64]       = "ENUM64",
> >>  };
> >>
> >> +struct sort_datum {
> >> +       int index;
> >> +       int type_rank;
> >> +       const char *sort_name;
> >> +       const char *own_name;
> >> +};
> >> +
> >>  static const char *btf_int_enc_str(__u8 encoding)
> >>  {
> >>         switch (encoding) {
> >> @@ -460,11 +467,114 @@ static void __printf(2, 0) btf_dump_printf(void *ctx,
> >>         vfprintf(stdout, fmt, args);
> >>  }
> >>
> >> +static bool is_reference_type(const struct btf_type *t)
> >> +{
> >> +       int kind = btf_kind(t);
> >> +
> >> +       return kind == BTF_KIND_CONST || kind == BTF_KIND_PTR || kind == BTF_KIND_VOLATILE ||
> >> +               kind == BTF_KIND_RESTRICT || kind == BTF_KIND_ARRAY || kind == BTF_KIND_TYPEDEF ||
> >> +               kind == BTF_KIND_DECL_TAG;
> >
> > probably best to write as a switch, also make sure that
> > BTF_KIND_TYPE_TAG is supported, it is effectively treated as
> > CONST/VOLATILE
> >
> > and actually looking below, I'd just incorporate these as extra cases
> > in the existing btf_type_rank() switch, and then have a similar
> > open-coded switch for btf_type_sort_name()
> >
> > When dealing with BTF I find these explicit switches listing what
> > kinds are special and how they are processed much easier to check and
> > follow than any of the extra helpers doing some kind checks
> >
> >
> >> +}
> >> +
> >> +static int btf_type_rank(const struct btf *btf, __u32 index, bool has_name)
> >> +{
> >> +       const struct btf_type *t = btf__type_by_id(btf, index);
> >> +       const int max_rank = 10;
> >> +       const int kind = btf_kind(t);
> >> +
> >> +       if (t->name_off)
> >> +               has_name = true;
> >> +
> >> +       switch (kind) {
> >> +       case BTF_KIND_ENUM:
> >> +       case BTF_KIND_ENUM64:
> >> +               return has_name ? 1 : 0;
> >> +       case BTF_KIND_INT:
> >> +       case BTF_KIND_FLOAT:
> >> +               return 2;
> >> +       case BTF_KIND_STRUCT:
> >> +       case BTF_KIND_UNION:
> >> +               return has_name ? 3 : max_rank;
> >> +       case BTF_KIND_FUNC_PROTO:
> >> +               return has_name ? 4 : max_rank;
> >> +
> >> +       default: {
> >> +               if (has_name && is_reference_type(t)) {
> >> +                       const int parent = kind == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
> >> +
> >> +                       return btf_type_rank(btf, parent, has_name);
> >> +               }
> >> +               return max_rank;
> >> +       }
> >
> > nit: you don't need these {} for default
> >
> >> +       }
> >> +}
> >> +
> >> +static const char *btf_type_sort_name(const struct btf *btf, __u32 index)
> >> +{
> >> +       const struct btf_type *t = btf__type_by_id(btf, index);
> >> +       int kind = btf_kind(t);
> >> +
> >> +       /* Use name of the first element for anonymous enums */
> >> +       if (!t->name_off && (kind == BTF_KIND_ENUM || kind == BTF_KIND_ENUM64) &&
> >
> > there is btf_is_any_enum() helper for this kind check
> >
> >> +           BTF_INFO_VLEN(t->info))
> >
> > please use btf_vlen(t) helper
> >
> >> +               return btf__name_by_offset(btf, btf_enum(t)->name_off);
> >
> > what if enum's vlen == 0? I think I mentioned that before, it
> > shouldn't happen in valid BTF, but it's technically allowable in BTF,
> > so best to be able to handle that instead of crashing or doing random
> > memory reads.
> >
> >> +
> >> +       /* Return base type name for reference types */
> >> +       while (is_reference_type(t)) {
> >> +               index = btf_kind(t) == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
> >> +               t = btf__type_by_id(btf, index);
> >> +       }
> >> +
> >> +       return btf__name_by_offset(btf, t->name_off);
> >> +}
> >> +
> >> +static int btf_type_compare(const void *left, const void *right)
> >> +{
> >> +       const struct sort_datum *datum1 = (const struct sort_datum *)left;
> >> +       const struct sort_datum *datum2 = (const struct sort_datum *)right;
> >> +       int sort_name_cmp;
> >
> > stylistic nit: it's minot, but I'd use less distracting naming. Eg., d1, d2, r.
> >
> >> +
> >> +       if (datum1->type_rank != datum2->type_rank)
> >> +               return datum1->type_rank < datum2->type_rank ? -1 : 1;
> >> +
> >> +       sort_name_cmp = strcmp(datum1->sort_name, datum2->sort_name);
> >> +       if (sort_name_cmp)
> >> +               return sort_name_cmp;
> >> +
> >> +       return strcmp(datum1->own_name, datum2->own_name);
> >> +}
> >> +
> >> +static struct sort_datum *sort_btf_c(const struct btf *btf)
> >> +{
> >> +       int total_root_types;
> >> +       struct sort_datum *datums;
> >> +
> >> +       total_root_types = btf__type_cnt(btf);
> >
> > nit: s/total_root_types/n/
> >
> >> +       datums = malloc(sizeof(struct sort_datum) * total_root_types);
> >> +       if (!datums)
> >> +               return NULL;
> >> +
> >> +       for (int i = 0; i < total_root_types; ++i) {
> >> +               struct sort_datum *current_datum = datums + i;
> >
> > nit: just d for the name, it's not going to be hard to follow or ambiguous
> >
> >> +               const struct btf_type *t = btf__type_by_id(btf, i);
> >> +
> >> +               current_datum->index = i;
> >> +               current_datum->type_rank = btf_type_rank(btf, i, false);
> >> +               current_datum->sort_name = btf_type_sort_name(btf, i);
> >> +               current_datum->own_name = btf__name_by_offset(btf, t->name_off);
> >> +       }
> >> +
> >> +       qsort(datums, total_root_types, sizeof(struct sort_datum), btf_type_compare);
> >> +
> >> +       return datums;
> >> +}
> >> +
> >>  static int dump_btf_c(const struct btf *btf,
> >> -                     __u32 *root_type_ids, int root_type_cnt)
> >> +                     __u32 *root_type_ids, int root_type_cnt, bool sort_dump)
> >>  {
> >>         struct btf_dump *d;
> >>         int err = 0, i;
> >> +       struct sort_datum *datums = NULL;
>
> Nit: Most variables in the file are declared in "reverse-Christmas-tree"
> order (longest lines first, unless there's a reason not to). Could you
> please try to preserve this order, here and elsewhere, for consistency?
>
> >>
> >>         d = btf_dump__new(btf, btf_dump_printf, NULL, NULL);
> >>         if (!d)
> >> @@ -486,8 +596,12 @@ static int dump_btf_c(const struct btf *btf,
> >>         } else {
> >>                 int cnt = btf__type_cnt(btf);
> >>
> >> +               if (sort_dump)
> >> +                       datums = sort_btf_c(btf);
> >>                 for (i = 1; i < cnt; i++) {
> >> -                       err = btf_dump__dump_type(d, i);
> >> +                       int idx = datums ? datums[i].index : i;
> >> +
> >> +                       err = btf_dump__dump_type(d, idx);
> >>                         if (err)
> >>                                 goto done;
> >>                 }
> >> @@ -501,6 +615,7 @@ static int dump_btf_c(const struct btf *btf,
> >>
> >>  done:
> >>         btf_dump__free(d);
> >> +       free(datums);
>
> Small nit: I'd swap the two lines above, it would seem more logical to
> free in the reverse order from allocation and would be more
> straightforward to "split" if we ever need to free d only when jumping
> from the first goto.
>
> >>         return err;
> >>  }
> >>
> >> @@ -553,6 +668,7 @@ static int do_dump(int argc, char **argv)
> >>         __u32 root_type_ids[2];
> >>         int root_type_cnt = 0;
> >>         bool dump_c = false;
> >> +       bool sort_dump_c = true;
> >>         __u32 btf_id = -1;
> >>         const char *src;
> >>         int fd = -1;
> >> @@ -663,6 +779,9 @@ static int do_dump(int argc, char **argv)
> >>                                 goto done;
> >>                         }
> >>                         NEXT_ARG();
> >> +               } else if (is_prefix(*argv, "unordered")) {
> >
> > it's more of a "original order" rather than unordered, so maybe "unnormalized"?
> I'd have gone with "unsorted", but Andrii's reasoning probably applies

unsorted also makes sense, let's go with that, it's a single word

> the same to it. I find "unnormalized" might be difficult to understand,
> maybe "preserve_order" or, shorter, "keep_order"?
>
> And as Alan mentioned on the other thread, we'll need the following
> updates for the new keyword:
>
> - Adding the keyword to the command summary at the top of
>   tools/bpf/bpftool/Documentation/bpftool-btf.rst:
>   | *FORMAT* := { **raw** | **c** [**unordered**]}
>   (or whatever keyword we pick)
> - Adding the description for the keyword, below on the same page
> - Adding the keyword to the help message, at the end of btf.c
> - Updating the bash completion. The patch below should work (to adjust
>   with the final keyword):
>
> ------
> diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
> index 04afe2ac2228..85a43c867e5f 100644
> --- a/tools/bpf/bpftool/bash-completion/bpftool
> +++ b/tools/bpf/bpftool/bash-completion/bpftool
> @@ -930,6 +930,9 @@ _bpftool()
>                          format)
>                              COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) )
>                              ;;
> +                        c)
> +                            COMPREPLY=( $( compgen -W "unordered" -- "$cur" ) )
> +                            ;;
>                          *)
>                              # emit extra options
>                              case ${words[3]} in
> ------
Alexei Starovoitov May 9, 2024, 11:24 p.m. UTC | #7
On Thu, May 9, 2024 at 4:09 PM Quentin Monnet <qmo@kernel.org> wrote:
>
> Nit: Most variables in the file are declared in "reverse-Christmas-tree"
> order (longest lines first, unless there's a reason not to). Could you
> please try to preserve this order, here and elsewhere, for consistency?

I so hate this nitpick.
I'll start introducing non-rev-xmas tree everywhere just
to stop this madness.
Quentin Monnet May 9, 2024, 11:39 p.m. UTC | #8
On 10/05/2024 00:24, Alexei Starovoitov wrote:
> On Thu, May 9, 2024 at 4:09 PM Quentin Monnet <qmo@kernel.org> wrote:
>>
>> Nit: Most variables in the file are declared in "reverse-Christmas-tree"
>> order (longest lines first, unless there's a reason not to). Could you
>> please try to preserve this order, here and elsewhere, for consistency?
> 
> I so hate this nitpick.

Yes, I've seen you push back before; I was wondering if you'd react on
this one. I don't mind too much about the ordering, to be honest. To me,
this is mostly a matter of consistency in the file.

> I'll start introducing non-rev-xmas tree everywhere just
> to stop this madness.

That's maybe not necessary :), at least as far as I'm concerned. I'm
fine with dropping this for future reviews (starting with the current one).

Quentin
diff mbox series

Patch

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 91fcb75babe3..09ecd2abf066 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -43,6 +43,13 @@  static const char * const btf_kind_str[NR_BTF_KINDS] = {
 	[BTF_KIND_ENUM64]	= "ENUM64",
 };
 
+struct sort_datum {
+	int index;
+	int type_rank;
+	const char *sort_name;
+	const char *own_name;
+};
+
 static const char *btf_int_enc_str(__u8 encoding)
 {
 	switch (encoding) {
@@ -460,11 +467,114 @@  static void __printf(2, 0) btf_dump_printf(void *ctx,
 	vfprintf(stdout, fmt, args);
 }
 
+static bool is_reference_type(const struct btf_type *t)
+{
+	int kind = btf_kind(t);
+
+	return kind == BTF_KIND_CONST || kind == BTF_KIND_PTR || kind == BTF_KIND_VOLATILE ||
+		kind == BTF_KIND_RESTRICT || kind == BTF_KIND_ARRAY || kind == BTF_KIND_TYPEDEF ||
+		kind == BTF_KIND_DECL_TAG;
+}
+
+static int btf_type_rank(const struct btf *btf, __u32 index, bool has_name)
+{
+	const struct btf_type *t = btf__type_by_id(btf, index);
+	const int max_rank = 10;
+	const int kind = btf_kind(t);
+
+	if (t->name_off)
+		has_name = true;
+
+	switch (kind) {
+	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
+		return has_name ? 1 : 0;
+	case BTF_KIND_INT:
+	case BTF_KIND_FLOAT:
+		return 2;
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		return has_name ? 3 : max_rank;
+	case BTF_KIND_FUNC_PROTO:
+		return has_name ? 4 : max_rank;
+
+	default: {
+		if (has_name && is_reference_type(t)) {
+			const int parent = kind == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
+
+			return btf_type_rank(btf, parent, has_name);
+		}
+		return max_rank;
+	}
+	}
+}
+
+static const char *btf_type_sort_name(const struct btf *btf, __u32 index)
+{
+	const struct btf_type *t = btf__type_by_id(btf, index);
+	int kind = btf_kind(t);
+
+	/* Use name of the first element for anonymous enums */
+	if (!t->name_off && (kind == BTF_KIND_ENUM || kind == BTF_KIND_ENUM64) &&
+	    BTF_INFO_VLEN(t->info))
+		return btf__name_by_offset(btf, btf_enum(t)->name_off);
+
+	/* Return base type name for reference types */
+	while (is_reference_type(t)) {
+		index = btf_kind(t) == BTF_KIND_ARRAY ? btf_array(t)->type : t->type;
+		t = btf__type_by_id(btf, index);
+	}
+
+	return btf__name_by_offset(btf, t->name_off);
+}
+
+static int btf_type_compare(const void *left, const void *right)
+{
+	const struct sort_datum *datum1 = (const struct sort_datum *)left;
+	const struct sort_datum *datum2 = (const struct sort_datum *)right;
+	int sort_name_cmp;
+
+	if (datum1->type_rank != datum2->type_rank)
+		return datum1->type_rank < datum2->type_rank ? -1 : 1;
+
+	sort_name_cmp = strcmp(datum1->sort_name, datum2->sort_name);
+	if (sort_name_cmp)
+		return sort_name_cmp;
+
+	return strcmp(datum1->own_name, datum2->own_name);
+}
+
+static struct sort_datum *sort_btf_c(const struct btf *btf)
+{
+	int total_root_types;
+	struct sort_datum *datums;
+
+	total_root_types = btf__type_cnt(btf);
+	datums = malloc(sizeof(struct sort_datum) * total_root_types);
+	if (!datums)
+		return NULL;
+
+	for (int i = 0; i < total_root_types; ++i) {
+		struct sort_datum *current_datum = datums + i;
+		const struct btf_type *t = btf__type_by_id(btf, i);
+
+		current_datum->index = i;
+		current_datum->type_rank = btf_type_rank(btf, i, false);
+		current_datum->sort_name = btf_type_sort_name(btf, i);
+		current_datum->own_name = btf__name_by_offset(btf, t->name_off);
+	}
+
+	qsort(datums, total_root_types, sizeof(struct sort_datum), btf_type_compare);
+
+	return datums;
+}
+
 static int dump_btf_c(const struct btf *btf,
-		      __u32 *root_type_ids, int root_type_cnt)
+		      __u32 *root_type_ids, int root_type_cnt, bool sort_dump)
 {
 	struct btf_dump *d;
 	int err = 0, i;
+	struct sort_datum *datums = NULL;
 
 	d = btf_dump__new(btf, btf_dump_printf, NULL, NULL);
 	if (!d)
@@ -486,8 +596,12 @@  static int dump_btf_c(const struct btf *btf,
 	} else {
 		int cnt = btf__type_cnt(btf);
 
+		if (sort_dump)
+			datums = sort_btf_c(btf);
 		for (i = 1; i < cnt; i++) {
-			err = btf_dump__dump_type(d, i);
+			int idx = datums ? datums[i].index : i;
+
+			err = btf_dump__dump_type(d, idx);
 			if (err)
 				goto done;
 		}
@@ -501,6 +615,7 @@  static int dump_btf_c(const struct btf *btf,
 
 done:
 	btf_dump__free(d);
+	free(datums);
 	return err;
 }
 
@@ -553,6 +668,7 @@  static int do_dump(int argc, char **argv)
 	__u32 root_type_ids[2];
 	int root_type_cnt = 0;
 	bool dump_c = false;
+	bool sort_dump_c = true;
 	__u32 btf_id = -1;
 	const char *src;
 	int fd = -1;
@@ -663,6 +779,9 @@  static int do_dump(int argc, char **argv)
 				goto done;
 			}
 			NEXT_ARG();
+		} else if (is_prefix(*argv, "unordered")) {
+			sort_dump_c = false;
+			NEXT_ARG();
 		} else {
 			p_err("unrecognized option: '%s'", *argv);
 			err = -EINVAL;
@@ -691,7 +810,7 @@  static int do_dump(int argc, char **argv)
 			err = -ENOTSUP;
 			goto done;
 		}
-		err = dump_btf_c(btf, root_type_ids, root_type_cnt);
+		err = dump_btf_c(btf, root_type_ids, root_type_cnt, sort_dump_c);
 	} else {
 		err = dump_btf_raw(btf, root_type_ids, root_type_cnt);
 	}