Message ID | 088c4362fa9f080c319df7c49bc6a4e1b6ae98e7.1710532093.git.dxu@dxuuu.xyz (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | pahole: Inject kfunc decl tags into BTF | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
On Fri, Mar 15, 2024 at 01:48:52PM -0600, Daniel Xu wrote: SNIP > +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > +{ > + const char *filename = encoder->filename; > + struct gobuffer btf_kfunc_ranges = {}; > + struct gobuffer btf_funcs = {}; > + Elf_Data *symbols = NULL; > + Elf_Data *idlist = NULL; > + Elf_Scn *symscn = NULL; > + int symbols_shndx = -1; > + size_t idlist_addr = 0; > + int fd = -1, err = -1; > + int idlist_shndx = -1; > + size_t strtabidx = 0; > + Elf_Scn *scn = NULL; > + Elf *elf = NULL; > + GElf_Shdr shdr; > + size_t strndx; > + char *secname; > + int nr_syms; > + int i = 0; > + > + fd = open(filename, O_RDONLY); > + if (fd < 0) { > + fprintf(stderr, "Cannot open %s\n", filename); > + goto out; > + } > + > + if (elf_version(EV_CURRENT) == EV_NONE) { > + elf_error("Cannot set libelf version"); > + goto out; > + } > + > + elf = elf_begin(fd, ELF_C_READ, NULL); > + if (elf == NULL) { > + elf_error("Cannot update ELF file"); > + goto out; > + } > + > + /* Locate symbol table and .BTF_ids sections */ > + if (elf_getshdrstrndx(elf, &strndx) < 0) > + goto out; nit, missing new line > + while ((scn = elf_nextscn(elf, scn)) != NULL) { > + Elf_Data *data; Acked/Tested-by: Jiri Olsa <jolsa@kernel.org> thanks, jirka > + > + i++; > + if (!gelf_getshdr(scn, &shdr)) { > + elf_error("Failed to get ELF section(%d) hdr", i); > + goto out; > + } > + > + secname = elf_strptr(elf, strndx, shdr.sh_name); > + if (!secname) { > + elf_error("Failed to get ELF section(%d) hdr name", i); > + goto out; > + } > + > + data = elf_getdata(scn, 0); > + if (!data) { > + elf_error("Failed to get ELF section(%d) data", i); > + goto out; > + } > + > + if (shdr.sh_type == SHT_SYMTAB) { > + symbols_shndx = i; > + symscn = scn; > + symbols = data; > + strtabidx = shdr.sh_link; > + } else if (!strcmp(secname, BTF_IDS_SECTION)) { > + idlist_shndx = i; > + idlist_addr = shdr.sh_addr; > + idlist = data; > + } > + } SNIP
On 15/03/2024 19:48, Daniel Xu wrote: > This commit teaches pahole to parse symbols in .BTF_ids section in > vmlinux and discover exported kfuncs. Pahole then takes the list of > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > Example of encoding: > > $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l > 121 > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 > [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static > [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1 > > This enables downstream users and tools to dynamically discover which > kfuncs are available on a system by parsing vmlinux or module BTF, both > available in /sys/kernel/btf. > > This feature is enabled with --btf_features=decl_tag,decl_tag_kfuncs. > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> This is great work; a lot of steps needed to collect this info, but it's really valuable! Reviewed-by: Alan Maguire <alan.maguire@oracle.com> Tested-by: Alan Maguire <alan.maguire@oracle.com> BTW we need something like the attached patch to switch to using --btf_features for pahole 1.26 and later; will I send it officially or do you have something that does the same that you want to roll into your bpf-next series? Let me know what works from your side. Thanks! > --- > btf_encoder.c | 368 ++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 368 insertions(+) > > diff --git a/btf_encoder.c b/btf_encoder.c > index 850e36f..31848e2 100644 > --- a/btf_encoder.c > +++ b/btf_encoder.c > @@ -34,6 +34,21 @@ > #include <pthread.h> > > #define BTF_ENCODER_MAX_PROTO 512 > +#define BTF_IDS_SECTION ".BTF_ids" > +#define BTF_ID_FUNC_PFX "__BTF_ID__func__" > +#define BTF_ID_SET8_PFX "__BTF_ID__set8__" > +#define BTF_SET8_KFUNCS (1 << 0) > +#define BTF_KFUNC_TYPE_TAG "bpf_kfunc" > + > +/* Adapted from include/linux/btf_ids.h */ > +struct btf_id_set8 { > + uint32_t cnt; > + uint32_t flags; > + struct { > + uint32_t id; > + uint32_t flags; > + } pairs[]; > +}; > > /* state used to do later encoding of saved functions */ > struct btf_encoder_state { > @@ -75,6 +90,7 @@ struct btf_encoder { > verbose, > force, > gen_floats, > + skip_encoding_decl_tag, > tag_kfuncs, > is_rel; > uint32_t array_index_id; > @@ -94,6 +110,17 @@ struct btf_encoder { > } functions; > }; > > +struct btf_func { > + const char *name; > + int type_id; > +}; > + > +/* Half open interval representing range of addresses containing kfuncs */ > +struct btf_kfunc_set_range { > + size_t start; > + size_t end; > +}; > + > static LIST_HEAD(encoders); > static pthread_mutex_t encoders__lock = PTHREAD_MUTEX_INITIALIZER; > > @@ -1363,8 +1390,339 @@ out: > return err; > } > > +/* Returns if `sym` points to a kfunc set */ > +static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, Elf_Data *idlist, size_t idlist_addr) > +{ > + void *ptr = idlist->d_buf; > + struct btf_id_set8 *set; > + bool is_set8; > + int off; > + > + /* kfuncs are only found in BTF_SET8's */ > + is_set8 = !strncmp(name, BTF_ID_SET8_PFX, sizeof(BTF_ID_SET8_PFX) - 1); > + if (!is_set8) > + return false; > + > + off = sym->st_value - idlist_addr; > + if (off >= idlist->d_size) { > + fprintf(stderr, "%s: symbol '%s' out of bounds\n", __func__, name); > + return false; > + } > + > + /* Check the set8 flags to see if it was marked as kfunc */ > + set = ptr + off; > + return set->flags & BTF_SET8_KFUNCS; > +} > + > +/* > + * Parse BTF_ID symbol and return the func name. > + * > + * Returns: > + * Caller-owned string containing func name if successful. > + * NULL if !func or on error. > + */ > +static char *get_func_name(const char *sym) > +{ > + char *func, *end; > + > + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) > + return NULL; > + > + /* Strip prefix */ > + func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); > + if (strlen(func) < 2) { > + free(func); > + return NULL; > + } > + > + /* Strip suffix */ > + end = strrchr(func, '_'); > + if (!end || *(end - 1) != '_') { > + free(func); > + return NULL; > + } > + *(end - 1) = '\0'; > + > + return func; > +} > + > +static int btf_func_cmp(const void *_a, const void *_b) > +{ > + const struct btf_func *a = _a; > + const struct btf_func *b = _b; > + > + return strcmp(a->name, b->name); > +} > + > +/* > + * Collects all functions described in BTF. > + * Returns non-zero on error. > + */ > +static int btf_encoder__collect_btf_funcs(struct btf_encoder *encoder, struct gobuffer *funcs) > +{ > + struct btf *btf = encoder->btf; > + int nr_types, type_id; > + int err = -1; > + > + /* First collect all the func entries into an array */ > + nr_types = btf__type_cnt(btf); > + for (type_id = 1; type_id < nr_types; type_id++) { > + const struct btf_type *type; > + struct btf_func func = {}; > + const char *name; > + > + type = btf__type_by_id(btf, type_id); > + if (!type) { > + fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n", > + __func__, type_id); > + err = -EINVAL; > + goto out; > + } > + > + if (!btf_is_func(type)) > + continue; > + > + name = btf__name_by_offset(btf, type->name_off); > + if (!name) { > + fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n", > + __func__, type_id); > + err = -EINVAL; > + goto out; > + } > + > + func.name = name; > + func.type_id = type_id; > + err = gobuffer__add(funcs, &func, sizeof(func)); > + if (err < 0) > + goto out; > + } > + > + /* Now that we've collected funcs, sort them by name */ > + qsort((void *)gobuffer__entries(funcs), gobuffer__nr_entries(funcs), > + sizeof(struct btf_func), btf_func_cmp); > + > + err = 0; > +out: > + return err; > +} > + > +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *funcs, const char *kfunc) > +{ > + struct btf_func key = { .name = kfunc }; > + struct btf *btf = encoder->btf; > + struct btf_func *target; > + const void *base; > + unsigned int cnt; > + int err = -1; > + > + base = gobuffer__entries(funcs); > + cnt = gobuffer__nr_entries(funcs); > + target = bsearch(&key, base, cnt, sizeof(key), btf_func_cmp); > + if (!target) { > + fprintf(stderr, "%s: failed to find kfunc '%s' in BTF\n", __func__, kfunc); > + goto out; > + } > + > + /* Note we are unconditionally adding the btf_decl_tag even > + * though vmlinux may already contain btf_decl_tags for kfuncs. > + * We are ok to do this b/c we will later btf__dedup() to remove > + * any duplicates. > + */ > + err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, target->type_id, -1); > + if (err < 0) { > + fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n", > + __func__, kfunc, err); > + goto out; > + } > + > + err = 0; > +out: > + return err; > +} > + > +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > +{ > + const char *filename = encoder->filename; > + struct gobuffer btf_kfunc_ranges = {}; > + struct gobuffer btf_funcs = {}; > + Elf_Data *symbols = NULL; > + Elf_Data *idlist = NULL; > + Elf_Scn *symscn = NULL; > + int symbols_shndx = -1; > + size_t idlist_addr = 0; > + int fd = -1, err = -1; > + int idlist_shndx = -1; > + size_t strtabidx = 0; > + Elf_Scn *scn = NULL; > + Elf *elf = NULL; > + GElf_Shdr shdr; > + size_t strndx; > + char *secname; > + int nr_syms; > + int i = 0; > + > + fd = open(filename, O_RDONLY); > + if (fd < 0) { > + fprintf(stderr, "Cannot open %s\n", filename); > + goto out; > + } > + > + if (elf_version(EV_CURRENT) == EV_NONE) { > + elf_error("Cannot set libelf version"); > + goto out; > + } > + > + elf = elf_begin(fd, ELF_C_READ, NULL); > + if (elf == NULL) { > + elf_error("Cannot update ELF file"); > + goto out; > + } > + > + /* Locate symbol table and .BTF_ids sections */ > + if (elf_getshdrstrndx(elf, &strndx) < 0) > + goto out; > + while ((scn = elf_nextscn(elf, scn)) != NULL) { > + Elf_Data *data; > + > + i++; > + if (!gelf_getshdr(scn, &shdr)) { > + elf_error("Failed to get ELF section(%d) hdr", i); > + goto out; > + } > + > + secname = elf_strptr(elf, strndx, shdr.sh_name); > + if (!secname) { > + elf_error("Failed to get ELF section(%d) hdr name", i); > + goto out; > + } > + > + data = elf_getdata(scn, 0); > + if (!data) { > + elf_error("Failed to get ELF section(%d) data", i); > + goto out; > + } > + > + if (shdr.sh_type == SHT_SYMTAB) { > + symbols_shndx = i; > + symscn = scn; > + symbols = data; > + strtabidx = shdr.sh_link; > + } else if (!strcmp(secname, BTF_IDS_SECTION)) { > + idlist_shndx = i; > + idlist_addr = shdr.sh_addr; > + idlist = data; > + } > + } > + > + /* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */ > + if (symbols_shndx == -1 || idlist_shndx == -1) { > + err = 0; > + goto out; > + } > + > + if (!gelf_getshdr(symscn, &shdr)) { > + elf_error("Failed to get ELF symbol table header"); > + goto out; > + } > + nr_syms = shdr.sh_size / shdr.sh_entsize; > + > + err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs); > + if (err) { > + fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__); > + goto out; > + } > + > + /* First collect all kfunc set ranges. > + * > + * Note we choose not to sort these ranges and accept a linear > + * search when doing lookups. Reasoning is that the number of > + * sets is ~O(100) and not worth the additional code to optimize. > + */ > + for (i = 0; i < nr_syms; i++) { > + struct btf_kfunc_set_range range = {}; > + const char *name; > + GElf_Sym sym; > + > + if (!gelf_getsym(symbols, i, &sym)) { > + elf_error("Failed to get ELF symbol(%d)", i); > + goto out; > + } > + > + if (sym.st_shndx != idlist_shndx) > + continue; > + > + name = elf_strptr(elf, strtabidx, sym.st_name); > + if (!is_sym_kfunc_set(&sym, name, idlist, idlist_addr)) > + continue; > + > + range.start = sym.st_value; > + range.end = sym.st_value + sym.st_size; > + gobuffer__add(&btf_kfunc_ranges, &range, sizeof(range)); > + } > + > + /* Now inject BTF with kfunc decl tag for detected kfuncs */ > + for (i = 0; i < nr_syms; i++) { > + const struct btf_kfunc_set_range *ranges; > + unsigned int ranges_cnt; > + char *func, *name; > + GElf_Sym sym; > + bool found; > + int err; > + int j; > + > + if (!gelf_getsym(symbols, i, &sym)) { > + elf_error("Failed to get ELF symbol(%d)", i); > + goto out; > + } > + > + if (sym.st_shndx != idlist_shndx) > + continue; > + > + name = elf_strptr(elf, strtabidx, sym.st_name); > + func = get_func_name(name); > + if (!func) > + continue; > + > + /* Check if function belongs to a kfunc set */ > + ranges = gobuffer__entries(&btf_kfunc_ranges); > + ranges_cnt = gobuffer__nr_entries(&btf_kfunc_ranges); > + found = false; > + for (j = 0; j < ranges_cnt; j++) { > + size_t addr = sym.st_value; > + > + if (ranges[j].start <= addr && addr < ranges[j].end) { > + found = true; > + break; > + } > + } > + if (!found) { > + free(func); > + continue; > + } > + > + err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func); > + if (err) { > + fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func); > + free(func); > + goto out; > + } > + free(func); > + } > + > + err = 0; > +out: > + __gobuffer__delete(&btf_funcs); > + __gobuffer__delete(&btf_kfunc_ranges); > + if (elf) > + elf_end(elf); > + if (fd != -1) > + close(fd); > + return err; > +} > + > int btf_encoder__encode(struct btf_encoder *encoder) > { > + bool should_tag_kfuncs; > int err; > > /* for single-threaded case, saved funcs are added here */ > @@ -1377,6 +1735,15 @@ int btf_encoder__encode(struct btf_encoder *encoder) > if (btf__type_cnt(encoder->btf) == 1) > return 0; > > + /* Note vmlinux may already contain btf_decl_tag's for kfuncs. So > + * take care to call this before btf_dedup(). > + */ > + should_tag_kfuncs = encoder->tag_kfuncs && !encoder->skip_encoding_decl_tag; > + if (should_tag_kfuncs && btf_encoder__tag_kfuncs(encoder)) { > + fprintf(stderr, "%s: failed to tag kfuncs!\n", __func__); > + return -1; > + } > + > if (btf__dedup(encoder->btf, NULL)) { > fprintf(stderr, "%s: btf__dedup failed!\n", __func__); > return -1; > @@ -1660,6 +2027,7 @@ struct btf_encoder *btf_encoder__new(struct cu *cu, const char *detached_filenam > encoder->force = conf_load->btf_encode_force; > encoder->gen_floats = conf_load->btf_gen_floats; > encoder->skip_encoding_vars = conf_load->skip_encoding_btf_vars; > + encoder->skip_encoding_decl_tag = conf_load->skip_encoding_btf_decl_tag; > encoder->tag_kfuncs = conf_load->btf_decl_tag_kfuncs; > encoder->verbose = verbose; > encoder->has_index_type = false;
On Wed, Mar 20, 2024 at 12:54:11PM +0000, Alan Maguire wrote: > On 15/03/2024 19:48, Daniel Xu wrote: > > This commit teaches pahole to parse symbols in .BTF_ids section in > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > Example of encoding: > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l > > 121 > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 > > [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static > > [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1 > > > > This enables downstream users and tools to dynamically discover which > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > available in /sys/kernel/btf. > > > > This feature is enabled with --btf_features=decl_tag,decl_tag_kfuncs. > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > This is great work; a lot of steps needed to collect this info, but it's > really valuable! > > Reviewed-by: Alan Maguire <alan.maguire@oracle.com> > Tested-by: Alan Maguire <alan.maguire@oracle.com> > > BTW we need something like the attached patch to switch to using > --btf_features for pahole 1.26 and later; will I send it officially or > do you have something that does the same that you want to roll into your > bpf-next series? Let me know what works from your side. Thanks! SNIP > > From 2ca37cfa1e9be4f4d9717fc3dc346cdd7c617e32 Mon Sep 17 00:00:00 2001 > From: Alan Maguire <alan.maguire@oracle.com> > Date: Mon, 11 Mar 2024 10:17:47 +0000 > Subject: [PATCH bpf-next] kbuild,bpf: switch to using --btf_features for > pahole v1.26 and later > > The btf_features list can be used for pahole v1.26 and later - > it is useful because if a feature is not yet implemented it will > not exit with a failure message. This will allow us to add feature > requests to the pahole options without having to check pahole versions > in future; if the version of pahole supports the feature it will be > added. > > Signed-off-by: Alan Maguire <alan.maguire@oracle.com> > --- > scripts/Makefile.btf | 7 +++++-- > 1 file changed, 5 insertions(+), 2 deletions(-) > > diff --git a/scripts/Makefile.btf b/scripts/Makefile.btf > index 82377e470aed..9694ca3c5252 100644 > --- a/scripts/Makefile.btf > +++ b/scripts/Makefile.btf > @@ -12,8 +12,11 @@ pahole-flags-$(call test-ge, $(pahole-ver), 121) += --btf_gen_floats > > pahole-flags-$(call test-ge, $(pahole-ver), 122) += -j > > -pahole-flags-$(CONFIG_PAHOLE_HAS_LANG_EXCLUDE) += --lang_exclude=rust > - > pahole-flags-$(call test-ge, $(pahole-ver), 125) += --skip_encoding_btf_inconsistent_proto --btf_gen_optimized > > +# Switch to using --btf_features for v1.26 and later. > +pahole-flags-$(call test-ge, $(pahole-ver), 126) = -j --btf_features=encode_force,var,float,emum64,decl_tag,type_tag,optimized_func,consistent_func I was testing with '--btf_features=all' but I guess it's better to be specific, lgtm jirka > + > +pahole-flags-$(CONFIG_PAHOLE_HAS_LANG_EXCLUDE) += --lang_exclude=rust > + > export PAHOLE_FLAGS := $(pahole-flags-y) > -- > 2.39.3 >
Hi Alan, On Wed, Mar 20, 2024 at 12:54:11PM +0000, Alan Maguire wrote: > On 15/03/2024 19:48, Daniel Xu wrote: > > This commit teaches pahole to parse symbols in .BTF_ids section in > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > Example of encoding: > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l > > 121 > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 > > [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static > > [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1 > > > > This enables downstream users and tools to dynamically discover which > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > available in /sys/kernel/btf. > > > > This feature is enabled with --btf_features=decl_tag,decl_tag_kfuncs. > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > This is great work; a lot of steps needed to collect this info, but it's > really valuable! > > Reviewed-by: Alan Maguire <alan.maguire@oracle.com> > Tested-by: Alan Maguire <alan.maguire@oracle.com> > > BTW we need something like the attached patch to switch to using > --btf_features for pahole 1.26 and later; will I send it officially or > do you have something that does the same that you want to roll into your > bpf-next series? Let me know what works from your side. Thanks! I was planning on sending that with the bpftool changes. IIRC there are a few conflicts with generated prototypes to fix. I think Andrii had const-ified some kfunc params from userspace side but not kernel. I probably need to go fixup the kernel side. Thanks, Daniel
On Wed, Mar 20, 2024 at 12:54:11PM +0000, Alan Maguire wrote: > On 15/03/2024 19:48, Daniel Xu wrote: > > This commit teaches pahole to parse symbols in .BTF_ids section in > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > Example of encoding: > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l > > 121 > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 > > [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static > > [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1 > > > > This enables downstream users and tools to dynamically discover which > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > available in /sys/kernel/btf. > > > > This feature is enabled with --btf_features=decl_tag,decl_tag_kfuncs. > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > This is great work; a lot of steps needed to collect this info, but it's > really valuable! > > Reviewed-by: Alan Maguire <alan.maguire@oracle.com> > Tested-by: Alan Maguire <alan.maguire@oracle.com> > > BTW we need something like the attached patch to switch to using > --btf_features for pahole 1.26 and later; will I send it officially or > do you have something that does the same that you want to roll into your > bpf-next series? Let me know what works from your side. Thanks! > > > --- > > btf_encoder.c | 368 ++++++++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 368 insertions(+) > > > > diff --git a/btf_encoder.c b/btf_encoder.c > > index 850e36f..31848e2 100644 > > --- a/btf_encoder.c > > +++ b/btf_encoder.c > > @@ -34,6 +34,21 @@ > > #include <pthread.h> > > > > #define BTF_ENCODER_MAX_PROTO 512 > > +#define BTF_IDS_SECTION ".BTF_ids" > > +#define BTF_ID_FUNC_PFX "__BTF_ID__func__" > > +#define BTF_ID_SET8_PFX "__BTF_ID__set8__" > > +#define BTF_SET8_KFUNCS (1 << 0) > > +#define BTF_KFUNC_TYPE_TAG "bpf_kfunc" > > + > > +/* Adapted from include/linux/btf_ids.h */ > > +struct btf_id_set8 { > > + uint32_t cnt; > > + uint32_t flags; > > + struct { > > + uint32_t id; > > + uint32_t flags; > > + } pairs[]; > > +}; > > > > /* state used to do later encoding of saved functions */ > > struct btf_encoder_state { > > @@ -75,6 +90,7 @@ struct btf_encoder { > > verbose, > > force, > > gen_floats, > > + skip_encoding_decl_tag, > > tag_kfuncs, > > is_rel; > > uint32_t array_index_id; > > @@ -94,6 +110,17 @@ struct btf_encoder { > > } functions; > > }; > > > > +struct btf_func { > > + const char *name; > > + int type_id; > > +}; > > + > > +/* Half open interval representing range of addresses containing kfuncs */ > > +struct btf_kfunc_set_range { > > + size_t start; > > + size_t end; Why size_t for addresses? > > +}; > > + > > static LIST_HEAD(encoders); > > static pthread_mutex_t encoders__lock = PTHREAD_MUTEX_INITIALIZER; > > > > @@ -1363,8 +1390,339 @@ out: > > return err; > > } > > > > +/* Returns if `sym` points to a kfunc set */ > > +static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, Elf_Data *idlist, size_t idlist_addr) > > +{ > > + void *ptr = idlist->d_buf; > > + struct btf_id_set8 *set; > > + bool is_set8; > > + int off; > > + > > + /* kfuncs are only found in BTF_SET8's */ > > + is_set8 = !strncmp(name, BTF_ID_SET8_PFX, sizeof(BTF_ID_SET8_PFX) - 1); > > + if (!is_set8) > > + return false; We have, as the kernel and tools/perf: dutil.h: * strstarts - does @str start with @prefix? dutil.h:static inline bool strstarts(const char *str, const char *prefix) Can you please use it? > > + > > + off = sym->st_value - idlist_addr; > > + if (off >= idlist->d_size) { > > + fprintf(stderr, "%s: symbol '%s' out of bounds\n", __func__, name); > > + return false; > > + } > > + > > + /* Check the set8 flags to see if it was marked as kfunc */ > > + set = ptr + off; > > + return set->flags & BTF_SET8_KFUNCS; > > +} > > + > > +/* > > + * Parse BTF_ID symbol and return the func name. > > + * > > + * Returns: > > + * Caller-owned string containing func name if successful. > > + * NULL if !func or on error. > > + */ > > +static char *get_func_name(const char *sym) > > +{ > > + char *func, *end; > > + > > + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) > > + return NULL; strstarts() > > + /* Strip prefix */ > > + func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); > > + if (strlen(func) < 2) { > > + free(func); > > + return NULL; > > + } Can you add a comment as to why this exception? Does this limitation matches one in the kernel? Where? > > + > > + /* Strip suffix */ > > + end = strrchr(func, '_'); > > + if (!end || *(end - 1) != '_') { > > + free(func); > > + return NULL; > > + } > > + *(end - 1) = '\0'; So the suffix has to have __? Can you add an example of such a func name as a comment above this chopping? > > + return func; > > +} > > + > > +static int btf_func_cmp(const void *_a, const void *_b) > > +{ > > + const struct btf_func *a = _a; > > + const struct btf_func *b = _b; > > + > > + return strcmp(a->name, b->name); > > +} > > + > > +/* > > + * Collects all functions described in BTF. > > + * Returns non-zero on error. > > + */ > > +static int btf_encoder__collect_btf_funcs(struct btf_encoder *encoder, struct gobuffer *funcs) > > +{ > > + struct btf *btf = encoder->btf; > > + int nr_types, type_id; > > + int err = -1; > > + > > + /* First collect all the func entries into an array */ > > + nr_types = btf__type_cnt(btf); > > + for (type_id = 1; type_id < nr_types; type_id++) { > > + const struct btf_type *type; > > + struct btf_func func = {}; > > + const char *name; > > + > > + type = btf__type_by_id(btf, type_id); > > + if (!type) { > > + fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n", > > + __func__, type_id); > > + err = -EINVAL; > > + goto out; > > + } > > + > > + if (!btf_is_func(type)) > > + continue; > > + > > + name = btf__name_by_offset(btf, type->name_off); > > + if (!name) { > > + fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n", > > + __func__, type_id); > > + err = -EINVAL; > > + goto out; > > + } > > + > > + func.name = name; > > + func.type_id = type_id; > > + err = gobuffer__add(funcs, &func, sizeof(func)); > > + if (err < 0) > > + goto out; > > + } > > + > > + /* Now that we've collected funcs, sort them by name */ > > + qsort((void *)gobuffer__entries(funcs), gobuffer__nr_entries(funcs), > > + sizeof(struct btf_func), btf_func_cmp); Better to introcduce a 'gobuffer__sort(funcs, sizeof(struct btf_func), btf_func_cmp)' like we have a gobuffer__compress(), please. > > + err = 0; > > +out: > > + return err; > > +} > > + > > +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *funcs, const char *kfunc) > > +{ > > + struct btf_func key = { .name = kfunc }; > > + struct btf *btf = encoder->btf; > > + struct btf_func *target; > > + const void *base; > > + unsigned int cnt; > > + int err = -1; > > + > > + base = gobuffer__entries(funcs); > > + cnt = gobuffer__nr_entries(funcs); > > + target = bsearch(&key, base, cnt, sizeof(key), btf_func_cmp); > > + if (!target) { > > + fprintf(stderr, "%s: failed to find kfunc '%s' in BTF\n", __func__, kfunc); > > + goto out; > > + } > > + > > + /* Note we are unconditionally adding the btf_decl_tag even > > + * though vmlinux may already contain btf_decl_tags for kfuncs. > > + * We are ok to do this b/c we will later btf__dedup() to remove > > + * any duplicates. > > + */ > > + err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, target->type_id, -1); > > + if (err < 0) { > > + fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n", > > + __func__, kfunc, err); > > + goto out; > > + } > > + > > + err = 0; > > +out: > > + return err; > > +} > > + > > +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > > +{ > > + const char *filename = encoder->filename; > > + struct gobuffer btf_kfunc_ranges = {}; > > + struct gobuffer btf_funcs = {}; > > + Elf_Data *symbols = NULL; > > + Elf_Data *idlist = NULL; > > + Elf_Scn *symscn = NULL; > > + int symbols_shndx = -1; > > + size_t idlist_addr = 0; > > + int fd = -1, err = -1; > > + int idlist_shndx = -1; > > + size_t strtabidx = 0; > > + Elf_Scn *scn = NULL; > > + Elf *elf = NULL; > > + GElf_Shdr shdr; > > + size_t strndx; > > + char *secname; > > + int nr_syms; > > + int i = 0; You did some coding on the Linux kernel net/ directory? 8-) Good. > > + fd = open(filename, O_RDONLY); > > + if (fd < 0) { > > + fprintf(stderr, "Cannot open %s\n", filename); > > + goto out; > > + } > > + > > + if (elf_version(EV_CURRENT) == EV_NONE) { > > + elf_error("Cannot set libelf version"); > > + goto out; > > + } > > + > > + elf = elf_begin(fd, ELF_C_READ, NULL); > > + if (elf == NULL) { > > + elf_error("Cannot update ELF file"); > > + goto out; > > + } > > + > > + /* Locate symbol table and .BTF_ids sections */ > > + if (elf_getshdrstrndx(elf, &strndx) < 0) > > + goto out; > > + while ((scn = elf_nextscn(elf, scn)) != NULL) { > > + Elf_Data *data; > > + > > + i++; > > + if (!gelf_getshdr(scn, &shdr)) { > > + elf_error("Failed to get ELF section(%d) hdr", i); > > + goto out; > > + } > > + > > + secname = elf_strptr(elf, strndx, shdr.sh_name); > > + if (!secname) { > > + elf_error("Failed to get ELF section(%d) hdr name", i); > > + goto out; > > + } > > + > > + data = elf_getdata(scn, 0); > > + if (!data) { > > + elf_error("Failed to get ELF section(%d) data", i); > > + goto out; > > + } > > + > > + if (shdr.sh_type == SHT_SYMTAB) { > > + symbols_shndx = i; > > + symscn = scn; > > + symbols = data; > > + strtabidx = shdr.sh_link; > > + } else if (!strcmp(secname, BTF_IDS_SECTION)) { > > + idlist_shndx = i; > > + idlist_addr = shdr.sh_addr; > > + idlist = data; > > + } > > + } > > + > > + /* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */ > > + if (symbols_shndx == -1 || idlist_shndx == -1) { > > + err = 0; > > + goto out; > > + } > > + > > + if (!gelf_getshdr(symscn, &shdr)) { > > + elf_error("Failed to get ELF symbol table header"); > > + goto out; > > + } > > + nr_syms = shdr.sh_size / shdr.sh_entsize; > > + > > + err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs); > > + if (err) { > > + fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__); > > + goto out; > > + } > > + > > + /* First collect all kfunc set ranges. > > + * > > + * Note we choose not to sort these ranges and accept a linear > > + * search when doing lookups. Reasoning is that the number of > > + * sets is ~O(100) and not worth the additional code to optimize. > > + */ > > + for (i = 0; i < nr_syms; i++) { > > + struct btf_kfunc_set_range range = {}; > > + const char *name; > > + GElf_Sym sym; > > + > > + if (!gelf_getsym(symbols, i, &sym)) { > > + elf_error("Failed to get ELF symbol(%d)", i); > > + goto out; > > + } > > + > > + if (sym.st_shndx != idlist_shndx) > > + continue; > > + > > + name = elf_strptr(elf, strtabidx, sym.st_name); > > + if (!is_sym_kfunc_set(&sym, name, idlist, idlist_addr)) > > + continue; > > + > > + range.start = sym.st_value; > > + range.end = sym.st_value + sym.st_size; > > + gobuffer__add(&btf_kfunc_ranges, &range, sizeof(range)); > > + } > > + > > + /* Now inject BTF with kfunc decl tag for detected kfuncs */ > > + for (i = 0; i < nr_syms; i++) { > > + const struct btf_kfunc_set_range *ranges; > > + unsigned int ranges_cnt; > > + char *func, *name; > > + GElf_Sym sym; > > + bool found; > > + int err; > > + int j; > > + > > + if (!gelf_getsym(symbols, i, &sym)) { > > + elf_error("Failed to get ELF symbol(%d)", i); > > + goto out; > > + } > > + > > + if (sym.st_shndx != idlist_shndx) > > + continue; > > + > > + name = elf_strptr(elf, strtabidx, sym.st_name); > > + func = get_func_name(name); > > + if (!func) > > + continue; > > + > > + /* Check if function belongs to a kfunc set */ > > + ranges = gobuffer__entries(&btf_kfunc_ranges); > > + ranges_cnt = gobuffer__nr_entries(&btf_kfunc_ranges); > > + found = false; > > + for (j = 0; j < ranges_cnt; j++) { > > + size_t addr = sym.st_value; > > + > > + if (ranges[j].start <= addr && addr < ranges[j].end) { > > + found = true; > > + break; > > + } > > + } > > + if (!found) { > > + free(func); > > + continue; > > + } > > + > > + err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func); > > + if (err) { > > + fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func); > > + free(func); > > + goto out; > > + } > > + free(func); > > + } > > + > > + err = 0; > > +out: > > + __gobuffer__delete(&btf_funcs); > > + __gobuffer__delete(&btf_kfunc_ranges); > > + if (elf) > > + elf_end(elf); > > + if (fd != -1) > > + close(fd); > > + return err; > > +} > > + > > int btf_encoder__encode(struct btf_encoder *encoder) > > { > > + bool should_tag_kfuncs; > > int err; > > > > /* for single-threaded case, saved funcs are added here */ > > @@ -1377,6 +1735,15 @@ int btf_encoder__encode(struct btf_encoder *encoder) > > if (btf__type_cnt(encoder->btf) == 1) > > return 0; > > > > + /* Note vmlinux may already contain btf_decl_tag's for kfuncs. So > > + * take care to call this before btf_dedup(). > > + */ > > + should_tag_kfuncs = encoder->tag_kfuncs && !encoder->skip_encoding_decl_tag; > > + if (should_tag_kfuncs && btf_encoder__tag_kfuncs(encoder)) { > > + fprintf(stderr, "%s: failed to tag kfuncs!\n", __func__); > > + return -1; > > + } > > + > > if (btf__dedup(encoder->btf, NULL)) { > > fprintf(stderr, "%s: btf__dedup failed!\n", __func__); > > return -1; > > @@ -1660,6 +2027,7 @@ struct btf_encoder *btf_encoder__new(struct cu *cu, const char *detached_filenam > > encoder->force = conf_load->btf_encode_force; > > encoder->gen_floats = conf_load->btf_gen_floats; > > encoder->skip_encoding_vars = conf_load->skip_encoding_btf_vars; > > + encoder->skip_encoding_decl_tag = conf_load->skip_encoding_btf_decl_tag; > > encoder->tag_kfuncs = conf_load->btf_decl_tag_kfuncs; > > encoder->verbose = verbose; > > encoder->has_index_type = false;
Hi Arnaldo, On Fri, Mar 22, 2024 at 10:56:53AM -0300, Arnaldo Carvalho de Melo wrote: > On Wed, Mar 20, 2024 at 12:54:11PM +0000, Alan Maguire wrote: > > On 15/03/2024 19:48, Daniel Xu wrote: > > > This commit teaches pahole to parse symbols in .BTF_ids section in > > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > > > Example of encoding: > > > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l > > > 121 > > > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 > > > [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static > > > [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1 > > > > > > This enables downstream users and tools to dynamically discover which > > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > > available in /sys/kernel/btf. > > > > > > This feature is enabled with --btf_features=decl_tag,decl_tag_kfuncs. > > > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > > > This is great work; a lot of steps needed to collect this info, but it's > > really valuable! > > > > Reviewed-by: Alan Maguire <alan.maguire@oracle.com> > > Tested-by: Alan Maguire <alan.maguire@oracle.com> > > > > BTW we need something like the attached patch to switch to using > > --btf_features for pahole 1.26 and later; will I send it officially or > > do you have something that does the same that you want to roll into your > > bpf-next series? Let me know what works from your side. Thanks! > > > > > --- > > > btf_encoder.c | 368 ++++++++++++++++++++++++++++++++++++++++++++++++++ > > > 1 file changed, 368 insertions(+) > > > > > > diff --git a/btf_encoder.c b/btf_encoder.c > > > index 850e36f..31848e2 100644 > > > --- a/btf_encoder.c > > > +++ b/btf_encoder.c > > > @@ -34,6 +34,21 @@ > > > #include <pthread.h> > > > > > > #define BTF_ENCODER_MAX_PROTO 512 > > > +#define BTF_IDS_SECTION ".BTF_ids" > > > +#define BTF_ID_FUNC_PFX "__BTF_ID__func__" > > > +#define BTF_ID_SET8_PFX "__BTF_ID__set8__" > > > +#define BTF_SET8_KFUNCS (1 << 0) > > > +#define BTF_KFUNC_TYPE_TAG "bpf_kfunc" > > > + > > > +/* Adapted from include/linux/btf_ids.h */ > > > +struct btf_id_set8 { > > > + uint32_t cnt; > > > + uint32_t flags; > > > + struct { > > > + uint32_t id; > > > + uint32_t flags; > > > + } pairs[]; > > > +}; > > > > > > /* state used to do later encoding of saved functions */ > > > struct btf_encoder_state { > > > @@ -75,6 +90,7 @@ struct btf_encoder { > > > verbose, > > > force, > > > gen_floats, > > > + skip_encoding_decl_tag, > > > tag_kfuncs, > > > is_rel; > > > uint32_t array_index_id; > > > @@ -94,6 +110,17 @@ struct btf_encoder { > > > } functions; > > > }; > > > > > > +struct btf_func { > > > + const char *name; > > > + int type_id; > > > +}; > > > + > > > +/* Half open interval representing range of addresses containing kfuncs */ > > > +struct btf_kfunc_set_range { > > > + size_t start; > > > + size_t end; > > Why size_t for addresses? No particular reason; seemed correct to handle 64bit and 32bit. I see uint64_t in other places. I can use that instead. > > > > +}; > > > + > > > static LIST_HEAD(encoders); > > > static pthread_mutex_t encoders__lock = PTHREAD_MUTEX_INITIALIZER; > > > > > > @@ -1363,8 +1390,339 @@ out: > > > return err; > > > } > > > > > > +/* Returns if `sym` points to a kfunc set */ > > > +static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, Elf_Data *idlist, size_t idlist_addr) > > > +{ > > > + void *ptr = idlist->d_buf; > > > + struct btf_id_set8 *set; > > > + bool is_set8; > > > + int off; > > > + > > > + /* kfuncs are only found in BTF_SET8's */ > > > + is_set8 = !strncmp(name, BTF_ID_SET8_PFX, sizeof(BTF_ID_SET8_PFX) - 1); > > > + if (!is_set8) > > > + return false; > > > We have, as the kernel and tools/perf: > > dutil.h: * strstarts - does @str start with @prefix? > dutil.h:static inline bool strstarts(const char *str, const char *prefix) > > Can you please use it? Ack. > > > > + > > > + off = sym->st_value - idlist_addr; > > > + if (off >= idlist->d_size) { > > > + fprintf(stderr, "%s: symbol '%s' out of bounds\n", __func__, name); > > > + return false; > > > + } > > > + > > > + /* Check the set8 flags to see if it was marked as kfunc */ > > > + set = ptr + off; > > > + return set->flags & BTF_SET8_KFUNCS; > > > +} > > > + > > > +/* > > > + * Parse BTF_ID symbol and return the func name. > > > + * > > > + * Returns: > > > + * Caller-owned string containing func name if successful. > > > + * NULL if !func or on error. > > > + */ > > > +static char *get_func_name(const char *sym) > > > +{ > > > + char *func, *end; > > > + > > > + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) > > > + return NULL; > > strstarts() > > > > + /* Strip prefix */ > > > + func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); > > > + if (strlen(func) < 2) { > > > + free(func); > > > + return NULL; > > > + } > > Can you add a comment as to why this exception? Does this limitation > matches one in the kernel? Where? This handles malformed input, such as "__BTF_ID__func___" (note the trailing triple underscore). I don't believe the kernel generates such names, but Eduard brought it up in [0] and I'd agree it'd be nice to not read out of bounds for all input. [0]: https://lore.kernel.org/bpf/itgzcn4cru32sk6sswbami2ojdynwyrfof3pnx5v4j3walx5oh@4qi3337fqnrz/ > > > > + > > > + /* Strip suffix */ > > > + end = strrchr(func, '_'); > > > + if (!end || *(end - 1) != '_') { > > > + free(func); > > > + return NULL; > > > + } > > > + *(end - 1) = '\0'; > > So the suffix has to have __? Can you add an example of such a func name > as a comment above this chopping? Sure. > > > > + return func; > > > +} > > > + > > > +static int btf_func_cmp(const void *_a, const void *_b) > > > +{ > > > + const struct btf_func *a = _a; > > > + const struct btf_func *b = _b; > > > + > > > + return strcmp(a->name, b->name); > > > +} > > > + > > > +/* > > > + * Collects all functions described in BTF. > > > + * Returns non-zero on error. > > > + */ > > > +static int btf_encoder__collect_btf_funcs(struct btf_encoder *encoder, struct gobuffer *funcs) > > > +{ > > > + struct btf *btf = encoder->btf; > > > + int nr_types, type_id; > > > + int err = -1; > > > + > > > + /* First collect all the func entries into an array */ > > > + nr_types = btf__type_cnt(btf); > > > + for (type_id = 1; type_id < nr_types; type_id++) { > > > + const struct btf_type *type; > > > + struct btf_func func = {}; > > > + const char *name; > > > + > > > + type = btf__type_by_id(btf, type_id); > > > + if (!type) { > > > + fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n", > > > + __func__, type_id); > > > + err = -EINVAL; > > > + goto out; > > > + } > > > + > > > + if (!btf_is_func(type)) > > > + continue; > > > + > > > + name = btf__name_by_offset(btf, type->name_off); > > > + if (!name) { > > > + fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n", > > > + __func__, type_id); > > > + err = -EINVAL; > > > + goto out; > > > + } > > > + > > > + func.name = name; > > > + func.type_id = type_id; > > > + err = gobuffer__add(funcs, &func, sizeof(func)); > > > + if (err < 0) > > > + goto out; > > > + } > > > + > > > + /* Now that we've collected funcs, sort them by name */ > > > + qsort((void *)gobuffer__entries(funcs), gobuffer__nr_entries(funcs), > > > + sizeof(struct btf_func), btf_func_cmp); > > Better to introcduce a 'gobuffer__sort(funcs, sizeof(struct btf_func), btf_func_cmp)' like we > have a gobuffer__compress(), please. Good idea [..] Thanks, Daniel
diff --git a/btf_encoder.c b/btf_encoder.c index 850e36f..31848e2 100644 --- a/btf_encoder.c +++ b/btf_encoder.c @@ -34,6 +34,21 @@ #include <pthread.h> #define BTF_ENCODER_MAX_PROTO 512 +#define BTF_IDS_SECTION ".BTF_ids" +#define BTF_ID_FUNC_PFX "__BTF_ID__func__" +#define BTF_ID_SET8_PFX "__BTF_ID__set8__" +#define BTF_SET8_KFUNCS (1 << 0) +#define BTF_KFUNC_TYPE_TAG "bpf_kfunc" + +/* Adapted from include/linux/btf_ids.h */ +struct btf_id_set8 { + uint32_t cnt; + uint32_t flags; + struct { + uint32_t id; + uint32_t flags; + } pairs[]; +}; /* state used to do later encoding of saved functions */ struct btf_encoder_state { @@ -75,6 +90,7 @@ struct btf_encoder { verbose, force, gen_floats, + skip_encoding_decl_tag, tag_kfuncs, is_rel; uint32_t array_index_id; @@ -94,6 +110,17 @@ struct btf_encoder { } functions; }; +struct btf_func { + const char *name; + int type_id; +}; + +/* Half open interval representing range of addresses containing kfuncs */ +struct btf_kfunc_set_range { + size_t start; + size_t end; +}; + static LIST_HEAD(encoders); static pthread_mutex_t encoders__lock = PTHREAD_MUTEX_INITIALIZER; @@ -1363,8 +1390,339 @@ out: return err; } +/* Returns if `sym` points to a kfunc set */ +static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, Elf_Data *idlist, size_t idlist_addr) +{ + void *ptr = idlist->d_buf; + struct btf_id_set8 *set; + bool is_set8; + int off; + + /* kfuncs are only found in BTF_SET8's */ + is_set8 = !strncmp(name, BTF_ID_SET8_PFX, sizeof(BTF_ID_SET8_PFX) - 1); + if (!is_set8) + return false; + + off = sym->st_value - idlist_addr; + if (off >= idlist->d_size) { + fprintf(stderr, "%s: symbol '%s' out of bounds\n", __func__, name); + return false; + } + + /* Check the set8 flags to see if it was marked as kfunc */ + set = ptr + off; + return set->flags & BTF_SET8_KFUNCS; +} + +/* + * Parse BTF_ID symbol and return the func name. + * + * Returns: + * Caller-owned string containing func name if successful. + * NULL if !func or on error. + */ +static char *get_func_name(const char *sym) +{ + char *func, *end; + + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) + return NULL; + + /* Strip prefix */ + func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); + if (strlen(func) < 2) { + free(func); + return NULL; + } + + /* Strip suffix */ + end = strrchr(func, '_'); + if (!end || *(end - 1) != '_') { + free(func); + return NULL; + } + *(end - 1) = '\0'; + + return func; +} + +static int btf_func_cmp(const void *_a, const void *_b) +{ + const struct btf_func *a = _a; + const struct btf_func *b = _b; + + return strcmp(a->name, b->name); +} + +/* + * Collects all functions described in BTF. + * Returns non-zero on error. + */ +static int btf_encoder__collect_btf_funcs(struct btf_encoder *encoder, struct gobuffer *funcs) +{ + struct btf *btf = encoder->btf; + int nr_types, type_id; + int err = -1; + + /* First collect all the func entries into an array */ + nr_types = btf__type_cnt(btf); + for (type_id = 1; type_id < nr_types; type_id++) { + const struct btf_type *type; + struct btf_func func = {}; + const char *name; + + type = btf__type_by_id(btf, type_id); + if (!type) { + fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n", + __func__, type_id); + err = -EINVAL; + goto out; + } + + if (!btf_is_func(type)) + continue; + + name = btf__name_by_offset(btf, type->name_off); + if (!name) { + fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n", + __func__, type_id); + err = -EINVAL; + goto out; + } + + func.name = name; + func.type_id = type_id; + err = gobuffer__add(funcs, &func, sizeof(func)); + if (err < 0) + goto out; + } + + /* Now that we've collected funcs, sort them by name */ + qsort((void *)gobuffer__entries(funcs), gobuffer__nr_entries(funcs), + sizeof(struct btf_func), btf_func_cmp); + + err = 0; +out: + return err; +} + +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *funcs, const char *kfunc) +{ + struct btf_func key = { .name = kfunc }; + struct btf *btf = encoder->btf; + struct btf_func *target; + const void *base; + unsigned int cnt; + int err = -1; + + base = gobuffer__entries(funcs); + cnt = gobuffer__nr_entries(funcs); + target = bsearch(&key, base, cnt, sizeof(key), btf_func_cmp); + if (!target) { + fprintf(stderr, "%s: failed to find kfunc '%s' in BTF\n", __func__, kfunc); + goto out; + } + + /* Note we are unconditionally adding the btf_decl_tag even + * though vmlinux may already contain btf_decl_tags for kfuncs. + * We are ok to do this b/c we will later btf__dedup() to remove + * any duplicates. + */ + err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, target->type_id, -1); + if (err < 0) { + fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n", + __func__, kfunc, err); + goto out; + } + + err = 0; +out: + return err; +} + +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) +{ + const char *filename = encoder->filename; + struct gobuffer btf_kfunc_ranges = {}; + struct gobuffer btf_funcs = {}; + Elf_Data *symbols = NULL; + Elf_Data *idlist = NULL; + Elf_Scn *symscn = NULL; + int symbols_shndx = -1; + size_t idlist_addr = 0; + int fd = -1, err = -1; + int idlist_shndx = -1; + size_t strtabidx = 0; + Elf_Scn *scn = NULL; + Elf *elf = NULL; + GElf_Shdr shdr; + size_t strndx; + char *secname; + int nr_syms; + int i = 0; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Cannot open %s\n", filename); + goto out; + } + + if (elf_version(EV_CURRENT) == EV_NONE) { + elf_error("Cannot set libelf version"); + goto out; + } + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (elf == NULL) { + elf_error("Cannot update ELF file"); + goto out; + } + + /* Locate symbol table and .BTF_ids sections */ + if (elf_getshdrstrndx(elf, &strndx) < 0) + goto out; + while ((scn = elf_nextscn(elf, scn)) != NULL) { + Elf_Data *data; + + i++; + if (!gelf_getshdr(scn, &shdr)) { + elf_error("Failed to get ELF section(%d) hdr", i); + goto out; + } + + secname = elf_strptr(elf, strndx, shdr.sh_name); + if (!secname) { + elf_error("Failed to get ELF section(%d) hdr name", i); + goto out; + } + + data = elf_getdata(scn, 0); + if (!data) { + elf_error("Failed to get ELF section(%d) data", i); + goto out; + } + + if (shdr.sh_type == SHT_SYMTAB) { + symbols_shndx = i; + symscn = scn; + symbols = data; + strtabidx = shdr.sh_link; + } else if (!strcmp(secname, BTF_IDS_SECTION)) { + idlist_shndx = i; + idlist_addr = shdr.sh_addr; + idlist = data; + } + } + + /* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */ + if (symbols_shndx == -1 || idlist_shndx == -1) { + err = 0; + goto out; + } + + if (!gelf_getshdr(symscn, &shdr)) { + elf_error("Failed to get ELF symbol table header"); + goto out; + } + nr_syms = shdr.sh_size / shdr.sh_entsize; + + err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs); + if (err) { + fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__); + goto out; + } + + /* First collect all kfunc set ranges. + * + * Note we choose not to sort these ranges and accept a linear + * search when doing lookups. Reasoning is that the number of + * sets is ~O(100) and not worth the additional code to optimize. + */ + for (i = 0; i < nr_syms; i++) { + struct btf_kfunc_set_range range = {}; + const char *name; + GElf_Sym sym; + + if (!gelf_getsym(symbols, i, &sym)) { + elf_error("Failed to get ELF symbol(%d)", i); + goto out; + } + + if (sym.st_shndx != idlist_shndx) + continue; + + name = elf_strptr(elf, strtabidx, sym.st_name); + if (!is_sym_kfunc_set(&sym, name, idlist, idlist_addr)) + continue; + + range.start = sym.st_value; + range.end = sym.st_value + sym.st_size; + gobuffer__add(&btf_kfunc_ranges, &range, sizeof(range)); + } + + /* Now inject BTF with kfunc decl tag for detected kfuncs */ + for (i = 0; i < nr_syms; i++) { + const struct btf_kfunc_set_range *ranges; + unsigned int ranges_cnt; + char *func, *name; + GElf_Sym sym; + bool found; + int err; + int j; + + if (!gelf_getsym(symbols, i, &sym)) { + elf_error("Failed to get ELF symbol(%d)", i); + goto out; + } + + if (sym.st_shndx != idlist_shndx) + continue; + + name = elf_strptr(elf, strtabidx, sym.st_name); + func = get_func_name(name); + if (!func) + continue; + + /* Check if function belongs to a kfunc set */ + ranges = gobuffer__entries(&btf_kfunc_ranges); + ranges_cnt = gobuffer__nr_entries(&btf_kfunc_ranges); + found = false; + for (j = 0; j < ranges_cnt; j++) { + size_t addr = sym.st_value; + + if (ranges[j].start <= addr && addr < ranges[j].end) { + found = true; + break; + } + } + if (!found) { + free(func); + continue; + } + + err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func); + if (err) { + fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func); + free(func); + goto out; + } + free(func); + } + + err = 0; +out: + __gobuffer__delete(&btf_funcs); + __gobuffer__delete(&btf_kfunc_ranges); + if (elf) + elf_end(elf); + if (fd != -1) + close(fd); + return err; +} + int btf_encoder__encode(struct btf_encoder *encoder) { + bool should_tag_kfuncs; int err; /* for single-threaded case, saved funcs are added here */ @@ -1377,6 +1735,15 @@ int btf_encoder__encode(struct btf_encoder *encoder) if (btf__type_cnt(encoder->btf) == 1) return 0; + /* Note vmlinux may already contain btf_decl_tag's for kfuncs. So + * take care to call this before btf_dedup(). + */ + should_tag_kfuncs = encoder->tag_kfuncs && !encoder->skip_encoding_decl_tag; + if (should_tag_kfuncs && btf_encoder__tag_kfuncs(encoder)) { + fprintf(stderr, "%s: failed to tag kfuncs!\n", __func__); + return -1; + } + if (btf__dedup(encoder->btf, NULL)) { fprintf(stderr, "%s: btf__dedup failed!\n", __func__); return -1; @@ -1660,6 +2027,7 @@ struct btf_encoder *btf_encoder__new(struct cu *cu, const char *detached_filenam encoder->force = conf_load->btf_encode_force; encoder->gen_floats = conf_load->btf_gen_floats; encoder->skip_encoding_vars = conf_load->skip_encoding_btf_vars; + encoder->skip_encoding_decl_tag = conf_load->skip_encoding_btf_decl_tag; encoder->tag_kfuncs = conf_load->btf_decl_tag_kfuncs; encoder->verbose = verbose; encoder->has_index_type = false;
This commit teaches pahole to parse symbols in .BTF_ids section in vmlinux and discover exported kfuncs. Pahole then takes the list of kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. Example of encoding: $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l 121 $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1 This enables downstream users and tools to dynamically discover which kfuncs are available on a system by parsing vmlinux or module BTF, both available in /sys/kernel/btf. This feature is enabled with --btf_features=decl_tag,decl_tag_kfuncs. Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> --- btf_encoder.c | 368 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 368 insertions(+)