Message ID | 20250207021442.155703-2-ihor.solodrai@linux.dev (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | btf_encoder: emit type tags for bpf_arena pointers | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
On Thu, 2025-02-06 at 18:14 -0800, Ihor Solodrai wrote: > From: Ihor Solodrai <ihor.solodrai@pm.me> > > btf_encoder__tag_kfuncs() is a post-processing step of BTF encoding, > executed right before BTF is deduped and dumped to the output. > > Split btf_encoder__tag_kfuncs() routine in two parts: > * btf_encoder__collect_kfuncs() > * btf_encoder__tag_kfuncs() > > btf_encoder__collect_kfuncs() reads the .BTF_ids section of the ELF, > collecting kfunc information into a list of kfunc_info structs in the > btf_encoder. It is executed in btf_encoder__new() when tag_kfuncs flag > is set. This way kfunc information is available during entire lifetime > of the btf_encoder. > > btf_encoder__tag_kfuncs() is basically the same: collect BTF > functions, and then for each kfunc find and tag correspoding BTF > func. Except now kfunc information is not collected in-place, but is > simply read from the btf_encoder. > > Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev> > --- Tbh, I don't think this split is necessary, modifying btf_type in-place should be fine (and libbpf does it at-least in one place). E.g. like here: https://github.com/acmel/dwarves/compare/master...eddyz87:dwarves:arena-attrs-no-split I like it because it keeps the change a bit more contained, but I do not insist. [...] > @@ -1876,11 +1886,10 @@ static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer * > return 0; > } > > -static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > +static int btf_encoder__collect_kfuncs(struct btf_encoder *encoder) > { > const char *filename = encoder->source_filename; > struct gobuffer btf_kfunc_ranges = {}; > - struct gobuffer btf_funcs = {}; > Elf_Data *symbols = NULL; > Elf_Data *idlist = NULL; > Elf_Scn *symscn = NULL; > @@ -1897,6 +1906,8 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > int nr_syms; > int i = 0; > > + INIT_LIST_HEAD(&encoder->kfuncs); > + Nit: do this in the btf_encoder__new? > fd = open(filename, O_RDONLY); > if (fd < 0) { > fprintf(stderr, "Cannot open %s\n", filename); > @@ -1977,12 +1988,6 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > } > nr_syms = shdr.sh_size / shdr.sh_entsize; > > - err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs); > - if (err) { > - fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__); > - goto out; > - } > - > /* First collect all kfunc set ranges. > * > * Note we choose not to sort these ranges and accept a linear > @@ -2015,12 +2020,13 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > for (i = 0; i < nr_syms; i++) { > const struct btf_kfunc_set_range *ranges; > const struct btf_id_and_flag *pair; > + struct elf_function *elf_fn; > + struct kfunc_info *kfunc; > unsigned int ranges_cnt; > char *func, *name; > ptrdiff_t off; > GElf_Sym sym; > bool found; > - int err; > int j; > > if (!gelf_getsym(symbols, i, &sym)) { > @@ -2061,18 +2067,26 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > continue; > } > > - err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func, pair->flags); > - if (err) { > - fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func); > - free(func); > + elf_fn = btf_encoder__find_function(encoder, func, 0); > + free(func); > + if (!elf_fn) > + continue; > + elf_fn->kfunc = true; > + > + kfunc = calloc(1, sizeof(*kfunc)); > + if (!kfunc) { > + fprintf(stderr, "%s: failed to allocate memory for kfunc info\n", __func__); > + err = -ENOMEM; > goto out; > } > - free(func); > + kfunc->id = pair->id; > + kfunc->flags = pair->flags; > + kfunc->name = elf_fn->name; If we do go with split, maybe make refactoring a bit more drastic and merge kfunc_info with elf_function? This would make maintaining a separate encoder->kfuncs list unnecessary. Also, can get rid of separate 'struct gobuffer *funcs'. E.g. see my commit on top of yours: https://github.com/acmel/dwarves/compare/master...eddyz87:dwarves:arena-attrs-merge-kfunc-info > + list_add(&kfunc->node, &encoder->kfuncs); > } > > err = 0; > out: > - __gobuffer__delete(&btf_funcs); > __gobuffer__delete(&btf_kfunc_ranges); > if (elf) > elf_end(elf); > @@ -2081,6 +2095,34 @@ out: > return err; > } > [...]
On 2/10/25 12:57 PM, Eduard Zingerman wrote: > On Thu, 2025-02-06 at 18:14 -0800, Ihor Solodrai wrote: >> From: Ihor Solodrai <ihor.solodrai@pm.me> >> >> btf_encoder__tag_kfuncs() is a post-processing step of BTF encoding, >> executed right before BTF is deduped and dumped to the output. >> >> Split btf_encoder__tag_kfuncs() routine in two parts: >> * btf_encoder__collect_kfuncs() >> * btf_encoder__tag_kfuncs() >> >> [...] > > Tbh, I don't think this split is necessary, modifying btf_type > in-place should be fine (and libbpf does it at-least in one place). > E.g. like here: > https://github.com/acmel/dwarves/compare/master...eddyz87:dwarves:arena-attrs-no-split > I like it because it keeps the change a bit more contained, > but I do not insist. There are a couple of reasons this split makes sense to me. First, I wanted to avoid modifying BTF. Having btf_encoder only appending things to BTF is easy to reason about. But you're saying modification does happen somewhere already? The second reason is that the input for kfunc tagging is ELF, and so it can be read at around the same time other ELF data is read (such as for fucntions table). This has an additional benefit of running in parallel to dwarf encoders (because one of the dwarf workers is creating btf_encoder struct), as opposed to a sequential post-processing step. Finally I think it is generally useful to have kfunc info available at any point of btf encoding, which becomes possible if the BTF_ids section is read at the beginning of the encoding process. > > [...] >> >> - err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func, pair->flags); >> - if (err) { >> - fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func); >> - free(func); >> + elf_fn = btf_encoder__find_function(encoder, func, 0); >> + free(func); >> + if (!elf_fn) >> + continue; >> + elf_fn->kfunc = true; >> + >> + kfunc = calloc(1, sizeof(*kfunc)); >> + if (!kfunc) { >> + fprintf(stderr, "%s: failed to allocate memory for kfunc info\n", __func__); >> + err = -ENOMEM; >> goto out; >> } >> - free(func); >> + kfunc->id = pair->id; >> + kfunc->flags = pair->flags; >> + kfunc->name = elf_fn->name; > > If we do go with split, maybe make refactoring a bit more drastic and > merge kfunc_info with elf_function? > This would make maintaining a separate encoder->kfuncs list unnecessary. > Also, can get rid of separate 'struct gobuffer *funcs'. > E.g. see my commit on top of yours: > https://github.com/acmel/dwarves/compare/master...eddyz87:dwarves:arena-attrs-merge-kfunc-info Yeah, I like it. I'll play with this for v2, thanks. > >> + list_add(&kfunc->node, &encoder->kfuncs); >> } >> >> err = 0; >> out: >> - __gobuffer__delete(&btf_funcs); >> __gobuffer__delete(&btf_kfunc_ranges); >> if (elf) >> elf_end(elf); >> @@ -2081,6 +2095,34 @@ out: >> return err; >> } >> > > [...] >
On Mon, 2025-02-10 at 22:42 +0000, Ihor Solodrai wrote: > On 2/10/25 12:57 PM, Eduard Zingerman wrote: > > On Thu, 2025-02-06 at 18:14 -0800, Ihor Solodrai wrote: > > > From: Ihor Solodrai <ihor.solodrai@pm.me> > > > > > > btf_encoder__tag_kfuncs() is a post-processing step of BTF encoding, > > > executed right before BTF is deduped and dumped to the output. > > > > > > Split btf_encoder__tag_kfuncs() routine in two parts: > > > * btf_encoder__collect_kfuncs() > > > * btf_encoder__tag_kfuncs() > > > > > > [...] > > > > Tbh, I don't think this split is necessary, modifying btf_type > > in-place should be fine (and libbpf does it at-least in one place). > > E.g. like here: > > https://github.com/acmel/dwarves/compare/master...eddyz87:dwarves:arena-attrs-no-split > > I like it because it keeps the change a bit more contained, > > but I do not insist. > > There are a couple of reasons this split makes sense to me. > > First, I wanted to avoid modifying BTF. Having btf_encoder only > appending things to BTF is easy to reason about. But you're saying > modification does happen somewhere already? See tools/lib/bpf/libbpf.c:bpf_object__sanitize_btf(). A set of small in-place rewrites for compatibility with old kernels. > The second reason is that the input for kfunc tagging is ELF, and so > it can be read at around the same time other ELF data is read (such as > for fucntions table). This has an additional benefit of running in > parallel to dwarf encoders (because one of the dwarf workers is > creating btf_encoder struct), as opposed to a sequential > post-processing step. Makes sense. [...]
diff --git a/btf_encoder.c b/btf_encoder.c index 511c1ea..e9f4baf 100644 --- a/btf_encoder.c +++ b/btf_encoder.c @@ -89,6 +89,7 @@ struct elf_function { const char *name; char *alias; size_t prefixlen; + bool kfunc; }; struct elf_secinfo { @@ -100,6 +101,13 @@ struct elf_secinfo { struct gobuffer secinfo; }; +struct kfunc_info { + struct list_head node; + uint32_t id; + uint32_t flags; + const char* name; +}; + struct elf_functions { struct list_head node; /* for elf_functions_list */ Elf *elf; /* source ELF */ @@ -143,6 +151,7 @@ struct btf_encoder { * so we have to store elf_functions tables per ELF. */ struct list_head elf_functions_list; + struct list_head kfuncs; /* list of kfunc_info */ }; struct btf_func { @@ -1842,9 +1851,9 @@ static int btf__add_kfunc_decl_tag(struct btf *btf, const char *tag, __u32 id, c return 0; } -static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *funcs, const char *kfunc, __u32 flags) +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *funcs, const struct kfunc_info *kfunc) { - struct btf_func key = { .name = kfunc }; + struct btf_func key = { .name = kfunc->name }; struct btf *btf = encoder->btf; struct btf_func *target; const void *base; @@ -1855,7 +1864,7 @@ static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer * cnt = gobuffer__nr_entries(funcs); target = bsearch(&key, base, cnt, sizeof(key), btf_func_cmp); if (!target) { - fprintf(stderr, "%s: failed to find kfunc '%s' in BTF\n", __func__, kfunc); + fprintf(stderr, "%s: failed to find kfunc '%s' in BTF\n", __func__, kfunc->name); return -1; } @@ -1864,11 +1873,12 @@ static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer * * We are ok to do this b/c we will later btf__dedup() to remove * any duplicates. */ - err = btf__add_kfunc_decl_tag(btf, BTF_KFUNC_TYPE_TAG, target->type_id, kfunc); + err = btf__add_kfunc_decl_tag(btf, BTF_KFUNC_TYPE_TAG, target->type_id, kfunc->name); if (err < 0) return err; - if (flags & KF_FASTCALL) { - err = btf__add_kfunc_decl_tag(btf, BTF_FASTCALL_TAG, target->type_id, kfunc); + + if (kfunc->flags & KF_FASTCALL) { + err = btf__add_kfunc_decl_tag(btf, BTF_FASTCALL_TAG, target->type_id, kfunc->name); if (err < 0) return err; } @@ -1876,11 +1886,10 @@ static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer * return 0; } -static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) +static int btf_encoder__collect_kfuncs(struct btf_encoder *encoder) { const char *filename = encoder->source_filename; struct gobuffer btf_kfunc_ranges = {}; - struct gobuffer btf_funcs = {}; Elf_Data *symbols = NULL; Elf_Data *idlist = NULL; Elf_Scn *symscn = NULL; @@ -1897,6 +1906,8 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) int nr_syms; int i = 0; + INIT_LIST_HEAD(&encoder->kfuncs); + fd = open(filename, O_RDONLY); if (fd < 0) { fprintf(stderr, "Cannot open %s\n", filename); @@ -1977,12 +1988,6 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) } nr_syms = shdr.sh_size / shdr.sh_entsize; - err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs); - if (err) { - fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__); - goto out; - } - /* First collect all kfunc set ranges. * * Note we choose not to sort these ranges and accept a linear @@ -2015,12 +2020,13 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) for (i = 0; i < nr_syms; i++) { const struct btf_kfunc_set_range *ranges; const struct btf_id_and_flag *pair; + struct elf_function *elf_fn; + struct kfunc_info *kfunc; unsigned int ranges_cnt; char *func, *name; ptrdiff_t off; GElf_Sym sym; bool found; - int err; int j; if (!gelf_getsym(symbols, i, &sym)) { @@ -2061,18 +2067,26 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) continue; } - err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func, pair->flags); - if (err) { - fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func); - free(func); + elf_fn = btf_encoder__find_function(encoder, func, 0); + free(func); + if (!elf_fn) + continue; + elf_fn->kfunc = true; + + kfunc = calloc(1, sizeof(*kfunc)); + if (!kfunc) { + fprintf(stderr, "%s: failed to allocate memory for kfunc info\n", __func__); + err = -ENOMEM; goto out; } - free(func); + kfunc->id = pair->id; + kfunc->flags = pair->flags; + kfunc->name = elf_fn->name; + list_add(&kfunc->node, &encoder->kfuncs); } err = 0; out: - __gobuffer__delete(&btf_funcs); __gobuffer__delete(&btf_kfunc_ranges); if (elf) elf_end(elf); @@ -2081,6 +2095,34 @@ out: return err; } +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) +{ + struct gobuffer btf_funcs = {}; + int err; + + err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs); + if (err) { + fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__); + goto out; + } + + struct kfunc_info *kfunc, *tmp; + list_for_each_entry_safe(kfunc, tmp, &encoder->kfuncs, node) { + err = btf_encoder__tag_kfunc(encoder, &btf_funcs, kfunc); + if (err) { + fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, kfunc->name); + goto out; + } + list_del(&kfunc->node); + free(kfunc); + } + + err = 0; +out: + __gobuffer__delete(&btf_funcs); + return err; +} + int btf_encoder__encode(struct btf_encoder *encoder, struct conf_load *conf) { bool should_tag_kfuncs; @@ -2496,6 +2538,11 @@ struct btf_encoder *btf_encoder__new(struct cu *cu, const char *detached_filenam if (!found_percpu && encoder->verbose) printf("%s: '%s' doesn't have '%s' section\n", __func__, cu->filename, PERCPU_SECTION); + if (encoder->tag_kfuncs) { + if (btf_encoder__collect_kfuncs(encoder)) + goto out_delete; + } + if (encoder->verbose) printf("File %s:\n", cu->filename); }