diff mbox series

[dwarves,1/3] btf_encoder: collect kfuncs info in btf_encoder__new

Message ID 20250207021442.155703-2-ihor.solodrai@linux.dev (mailing list archive)
State Superseded
Headers show
Series btf_encoder: emit type tags for bpf_arena pointers | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Ihor Solodrai Feb. 7, 2025, 2:14 a.m. UTC
From: Ihor Solodrai <ihor.solodrai@pm.me>

btf_encoder__tag_kfuncs() is a post-processing step of BTF encoding,
executed right before BTF is deduped and dumped to the output.

Split btf_encoder__tag_kfuncs() routine in two parts:
  * btf_encoder__collect_kfuncs()
  * btf_encoder__tag_kfuncs()

btf_encoder__collect_kfuncs() reads the .BTF_ids section of the ELF,
collecting kfunc information into a list of kfunc_info structs in the
btf_encoder. It is executed in btf_encoder__new() when tag_kfuncs flag
is set. This way kfunc information is available during entire lifetime
of the btf_encoder.

btf_encoder__tag_kfuncs() is basically the same: collect BTF
functions, and then for each kfunc find and tag correspoding BTF
func. Except now kfunc information is not collected in-place, but is
simply read from the btf_encoder.

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
---
 btf_encoder.c | 89 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 68 insertions(+), 21 deletions(-)

Comments

Eduard Zingerman Feb. 10, 2025, 8:57 p.m. UTC | #1
On Thu, 2025-02-06 at 18:14 -0800, Ihor Solodrai wrote:
> From: Ihor Solodrai <ihor.solodrai@pm.me>
> 
> btf_encoder__tag_kfuncs() is a post-processing step of BTF encoding,
> executed right before BTF is deduped and dumped to the output.
> 
> Split btf_encoder__tag_kfuncs() routine in two parts:
>   * btf_encoder__collect_kfuncs()
>   * btf_encoder__tag_kfuncs()
> 
> btf_encoder__collect_kfuncs() reads the .BTF_ids section of the ELF,
> collecting kfunc information into a list of kfunc_info structs in the
> btf_encoder. It is executed in btf_encoder__new() when tag_kfuncs flag
> is set. This way kfunc information is available during entire lifetime
> of the btf_encoder.
> 
> btf_encoder__tag_kfuncs() is basically the same: collect BTF
> functions, and then for each kfunc find and tag correspoding BTF
> func. Except now kfunc information is not collected in-place, but is
> simply read from the btf_encoder.
> 
> Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
> ---

Tbh, I don't think this split is necessary, modifying btf_type
in-place should be fine (and libbpf does it at-least in one place).
E.g. like here:
https://github.com/acmel/dwarves/compare/master...eddyz87:dwarves:arena-attrs-no-split
I like it because it keeps the change a bit more contained,
but I do not insist.

[...]

> @@ -1876,11 +1886,10 @@ static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *
>  	return 0;
>  }
>  
> -static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
> +static int btf_encoder__collect_kfuncs(struct btf_encoder *encoder)
>  {
>  	const char *filename = encoder->source_filename;
>  	struct gobuffer btf_kfunc_ranges = {};
> -	struct gobuffer btf_funcs = {};
>  	Elf_Data *symbols = NULL;
>  	Elf_Data *idlist = NULL;
>  	Elf_Scn *symscn = NULL;
> @@ -1897,6 +1906,8 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
>  	int nr_syms;
>  	int i = 0;
>  
> +	INIT_LIST_HEAD(&encoder->kfuncs);
> +

Nit: do this in the btf_encoder__new?

>  	fd = open(filename, O_RDONLY);
>  	if (fd < 0) {
>  		fprintf(stderr, "Cannot open %s\n", filename);
> @@ -1977,12 +1988,6 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
>  	}
>  	nr_syms = shdr.sh_size / shdr.sh_entsize;
>  
> -	err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs);
> -	if (err) {
> -		fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__);
> -		goto out;
> -	}
> -
>  	/* First collect all kfunc set ranges.
>  	 *
>  	 * Note we choose not to sort these ranges and accept a linear
> @@ -2015,12 +2020,13 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
>  	for (i = 0; i < nr_syms; i++) {
>  		const struct btf_kfunc_set_range *ranges;
>  		const struct btf_id_and_flag *pair;
> +		struct elf_function *elf_fn;
> +		struct kfunc_info *kfunc;
>  		unsigned int ranges_cnt;
>  		char *func, *name;
>  		ptrdiff_t off;
>  		GElf_Sym sym;
>  		bool found;
> -		int err;
>  		int j;
>  
>  		if (!gelf_getsym(symbols, i, &sym)) {
> @@ -2061,18 +2067,26 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
>  			continue;
>  		}
>  
> -		err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func, pair->flags);
> -		if (err) {
> -			fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func);
> -			free(func);
> +		elf_fn = btf_encoder__find_function(encoder, func, 0);
> +		free(func);
> +		if (!elf_fn)
> +			continue;
> +		elf_fn->kfunc = true;
> +
> +		kfunc = calloc(1, sizeof(*kfunc));
> +		if (!kfunc) {
> +			fprintf(stderr, "%s: failed to allocate memory for kfunc info\n", __func__);
> +			err = -ENOMEM;
>  			goto out;
>  		}
> -		free(func);
> +		kfunc->id = pair->id;
> +		kfunc->flags = pair->flags;
> +		kfunc->name = elf_fn->name;

If we do go with split, maybe make refactoring a bit more drastic and
merge kfunc_info with elf_function?
This would make maintaining a separate encoder->kfuncs list unnecessary.
Also, can get rid of separate 'struct gobuffer *funcs'.
E.g. see my commit on top of yours:
https://github.com/acmel/dwarves/compare/master...eddyz87:dwarves:arena-attrs-merge-kfunc-info

> +		list_add(&kfunc->node, &encoder->kfuncs);
>  	}
>  
>  	err = 0;
>  out:
> -	__gobuffer__delete(&btf_funcs);
>  	__gobuffer__delete(&btf_kfunc_ranges);
>  	if (elf)
>  		elf_end(elf);
> @@ -2081,6 +2095,34 @@ out:
>  	return err;
>  }
>  

[...]
Ihor Solodrai Feb. 10, 2025, 10:42 p.m. UTC | #2
On 2/10/25 12:57 PM, Eduard Zingerman wrote:
> On Thu, 2025-02-06 at 18:14 -0800, Ihor Solodrai wrote:
>> From: Ihor Solodrai <ihor.solodrai@pm.me>
>>
>> btf_encoder__tag_kfuncs() is a post-processing step of BTF encoding,
>> executed right before BTF is deduped and dumped to the output.
>>
>> Split btf_encoder__tag_kfuncs() routine in two parts:
>>   * btf_encoder__collect_kfuncs()
>>   * btf_encoder__tag_kfuncs()
>>
>> [...]
>
> Tbh, I don't think this split is necessary, modifying btf_type
> in-place should be fine (and libbpf does it at-least in one place).
> E.g. like here:
> https://github.com/acmel/dwarves/compare/master...eddyz87:dwarves:arena-attrs-no-split
> I like it because it keeps the change a bit more contained,
> but I do not insist.

There are a couple of reasons this split makes sense to me.

First, I wanted to avoid modifying BTF. Having btf_encoder only
appending things to BTF is easy to reason about. But you're saying
modification does happen somewhere already?

The second reason is that the input for kfunc tagging is ELF, and so
it can be read at around the same time other ELF data is read (such as
for fucntions table). This has an additional benefit of running in
parallel to dwarf encoders (because one of the dwarf workers is
creating btf_encoder struct), as opposed to a sequential
post-processing step.

Finally I think it is generally useful to have kfunc info available at
any point of btf encoding, which becomes possible if the BTF_ids
section is read at the beginning of the encoding process.

>
> [...]
>>  
>> -		err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func, pair->flags);
>> -		if (err) {
>> -			fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func);
>> -			free(func);
>> +		elf_fn = btf_encoder__find_function(encoder, func, 0);
>> +		free(func);
>> +		if (!elf_fn)
>> +			continue;
>> +		elf_fn->kfunc = true;
>> +
>> +		kfunc = calloc(1, sizeof(*kfunc));
>> +		if (!kfunc) {
>> +			fprintf(stderr, "%s: failed to allocate memory for kfunc info\n", __func__);
>> +			err = -ENOMEM;
>>  			goto out;
>>  		}
>> -		free(func);
>> +		kfunc->id = pair->id;
>> +		kfunc->flags = pair->flags;
>> +		kfunc->name = elf_fn->name;
>
> If we do go with split, maybe make refactoring a bit more drastic and
> merge kfunc_info with elf_function?
> This would make maintaining a separate encoder->kfuncs list unnecessary.
> Also, can get rid of separate 'struct gobuffer *funcs'.
> E.g. see my commit on top of yours:
> https://github.com/acmel/dwarves/compare/master...eddyz87:dwarves:arena-attrs-merge-kfunc-info

Yeah, I like it. I'll play with this for v2, thanks.

>
>> +		list_add(&kfunc->node, &encoder->kfuncs);
>>  	}
>>  
>>  	err = 0;
>>  out:
>> -	__gobuffer__delete(&btf_funcs);
>>  	__gobuffer__delete(&btf_kfunc_ranges);
>>  	if (elf)
>>  		elf_end(elf);
>> @@ -2081,6 +2095,34 @@ out:
>>  	return err;
>>  }
>>  
>
> [...]
>
Eduard Zingerman Feb. 11, 2025, 12:35 a.m. UTC | #3
On Mon, 2025-02-10 at 22:42 +0000, Ihor Solodrai wrote:
> On 2/10/25 12:57 PM, Eduard Zingerman wrote:
> > On Thu, 2025-02-06 at 18:14 -0800, Ihor Solodrai wrote:
> > > From: Ihor Solodrai <ihor.solodrai@pm.me>
> > > 
> > > btf_encoder__tag_kfuncs() is a post-processing step of BTF encoding,
> > > executed right before BTF is deduped and dumped to the output.
> > > 
> > > Split btf_encoder__tag_kfuncs() routine in two parts:
> > >   * btf_encoder__collect_kfuncs()
> > >   * btf_encoder__tag_kfuncs()
> > > 
> > > [...]
> > 
> > Tbh, I don't think this split is necessary, modifying btf_type
> > in-place should be fine (and libbpf does it at-least in one place).
> > E.g. like here:
> > https://github.com/acmel/dwarves/compare/master...eddyz87:dwarves:arena-attrs-no-split
> > I like it because it keeps the change a bit more contained,
> > but I do not insist.
> 
> There are a couple of reasons this split makes sense to me.
> 
> First, I wanted to avoid modifying BTF. Having btf_encoder only
> appending things to BTF is easy to reason about. But you're saying
> modification does happen somewhere already?

See tools/lib/bpf/libbpf.c:bpf_object__sanitize_btf().
A set of small in-place rewrites for compatibility with old kernels.

> The second reason is that the input for kfunc tagging is ELF, and so
> it can be read at around the same time other ELF data is read (such as
> for fucntions table). This has an additional benefit of running in
> parallel to dwarf encoders (because one of the dwarf workers is
> creating btf_encoder struct), as opposed to a sequential
> post-processing step.

Makes sense.

[...]
diff mbox series

Patch

diff --git a/btf_encoder.c b/btf_encoder.c
index 511c1ea..e9f4baf 100644
--- a/btf_encoder.c
+++ b/btf_encoder.c
@@ -89,6 +89,7 @@  struct elf_function {
 	const char	*name;
 	char		*alias;
 	size_t		prefixlen;
+	bool		kfunc;
 };
 
 struct elf_secinfo {
@@ -100,6 +101,13 @@  struct elf_secinfo {
 	struct gobuffer secinfo;
 };
 
+struct kfunc_info {
+	struct list_head node;
+	uint32_t id;
+	uint32_t flags;
+	const char* name;
+};
+
 struct elf_functions {
 	struct list_head node; /* for elf_functions_list */
 	Elf *elf; /* source ELF */
@@ -143,6 +151,7 @@  struct btf_encoder {
 	 * so we have to store elf_functions tables per ELF.
 	 */
 	struct list_head elf_functions_list;
+	struct list_head kfuncs; /* list of kfunc_info */
 };
 
 struct btf_func {
@@ -1842,9 +1851,9 @@  static int btf__add_kfunc_decl_tag(struct btf *btf, const char *tag, __u32 id, c
 	return 0;
 }
 
-static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *funcs, const char *kfunc, __u32 flags)
+static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *funcs, const struct kfunc_info *kfunc)
 {
-	struct btf_func key = { .name = kfunc };
+	struct btf_func key = { .name = kfunc->name };
 	struct btf *btf = encoder->btf;
 	struct btf_func *target;
 	const void *base;
@@ -1855,7 +1864,7 @@  static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *
 	cnt = gobuffer__nr_entries(funcs);
 	target = bsearch(&key, base, cnt, sizeof(key), btf_func_cmp);
 	if (!target) {
-		fprintf(stderr, "%s: failed to find kfunc '%s' in BTF\n", __func__, kfunc);
+		fprintf(stderr, "%s: failed to find kfunc '%s' in BTF\n", __func__, kfunc->name);
 		return -1;
 	}
 
@@ -1864,11 +1873,12 @@  static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *
 	 * We are ok to do this b/c we will later btf__dedup() to remove
 	 * any duplicates.
 	 */
-	err = btf__add_kfunc_decl_tag(btf, BTF_KFUNC_TYPE_TAG, target->type_id, kfunc);
+	err = btf__add_kfunc_decl_tag(btf, BTF_KFUNC_TYPE_TAG, target->type_id, kfunc->name);
 	if (err < 0)
 		return err;
-	if (flags & KF_FASTCALL) {
-		err = btf__add_kfunc_decl_tag(btf, BTF_FASTCALL_TAG, target->type_id, kfunc);
+
+	if (kfunc->flags & KF_FASTCALL) {
+		err = btf__add_kfunc_decl_tag(btf, BTF_FASTCALL_TAG, target->type_id, kfunc->name);
 		if (err < 0)
 			return err;
 	}
@@ -1876,11 +1886,10 @@  static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *
 	return 0;
 }
 
-static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
+static int btf_encoder__collect_kfuncs(struct btf_encoder *encoder)
 {
 	const char *filename = encoder->source_filename;
 	struct gobuffer btf_kfunc_ranges = {};
-	struct gobuffer btf_funcs = {};
 	Elf_Data *symbols = NULL;
 	Elf_Data *idlist = NULL;
 	Elf_Scn *symscn = NULL;
@@ -1897,6 +1906,8 @@  static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
 	int nr_syms;
 	int i = 0;
 
+	INIT_LIST_HEAD(&encoder->kfuncs);
+
 	fd = open(filename, O_RDONLY);
 	if (fd < 0) {
 		fprintf(stderr, "Cannot open %s\n", filename);
@@ -1977,12 +1988,6 @@  static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
 	}
 	nr_syms = shdr.sh_size / shdr.sh_entsize;
 
-	err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs);
-	if (err) {
-		fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__);
-		goto out;
-	}
-
 	/* First collect all kfunc set ranges.
 	 *
 	 * Note we choose not to sort these ranges and accept a linear
@@ -2015,12 +2020,13 @@  static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
 	for (i = 0; i < nr_syms; i++) {
 		const struct btf_kfunc_set_range *ranges;
 		const struct btf_id_and_flag *pair;
+		struct elf_function *elf_fn;
+		struct kfunc_info *kfunc;
 		unsigned int ranges_cnt;
 		char *func, *name;
 		ptrdiff_t off;
 		GElf_Sym sym;
 		bool found;
-		int err;
 		int j;
 
 		if (!gelf_getsym(symbols, i, &sym)) {
@@ -2061,18 +2067,26 @@  static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
 			continue;
 		}
 
-		err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func, pair->flags);
-		if (err) {
-			fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func);
-			free(func);
+		elf_fn = btf_encoder__find_function(encoder, func, 0);
+		free(func);
+		if (!elf_fn)
+			continue;
+		elf_fn->kfunc = true;
+
+		kfunc = calloc(1, sizeof(*kfunc));
+		if (!kfunc) {
+			fprintf(stderr, "%s: failed to allocate memory for kfunc info\n", __func__);
+			err = -ENOMEM;
 			goto out;
 		}
-		free(func);
+		kfunc->id = pair->id;
+		kfunc->flags = pair->flags;
+		kfunc->name = elf_fn->name;
+		list_add(&kfunc->node, &encoder->kfuncs);
 	}
 
 	err = 0;
 out:
-	__gobuffer__delete(&btf_funcs);
 	__gobuffer__delete(&btf_kfunc_ranges);
 	if (elf)
 		elf_end(elf);
@@ -2081,6 +2095,34 @@  out:
 	return err;
 }
 
+static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
+{
+	struct gobuffer btf_funcs = {};
+	int err;
+
+	err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs);
+	if (err) {
+		fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__);
+		goto out;
+	}
+
+	struct kfunc_info *kfunc, *tmp;
+	list_for_each_entry_safe(kfunc, tmp, &encoder->kfuncs, node) {
+		err = btf_encoder__tag_kfunc(encoder, &btf_funcs, kfunc);
+		if (err) {
+			fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, kfunc->name);
+			goto out;
+		}
+		list_del(&kfunc->node);
+		free(kfunc);
+	}
+
+	err = 0;
+out:
+	__gobuffer__delete(&btf_funcs);
+	return err;
+}
+
 int btf_encoder__encode(struct btf_encoder *encoder, struct conf_load *conf)
 {
 	bool should_tag_kfuncs;
@@ -2496,6 +2538,11 @@  struct btf_encoder *btf_encoder__new(struct cu *cu, const char *detached_filenam
 		if (!found_percpu && encoder->verbose)
 			printf("%s: '%s' doesn't have '%s' section\n", __func__, cu->filename, PERCPU_SECTION);
 
+		if (encoder->tag_kfuncs) {
+			if (btf_encoder__collect_kfuncs(encoder))
+				goto out_delete;
+		}
+
 		if (encoder->verbose)
 			printf("File %s:\n", cu->filename);
 	}