Message ID | 28e81ccf28d6dd33f6db50af6526dc1770502b8d.1707071969.git.dxu@dxuuu.xyz (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | BPF |
Headers | show |
Series | pahole: Inject kfunc decl tags into BTF | expand |
On Sun, 2024-02-04 at 11:40 -0700, Daniel Xu wrote: [...] > +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > +{ > + const char *filename = encoder->filename; > + struct gobuffer btf_kfunc_ranges = {}; > + struct gobuffer btf_funcs = {}; > + Elf_Scn *symscn = NULL; > + int symbols_shndx = -1; > + int fd = -1, err = -1; > + int idlist_shndx = -1; > + Elf_Scn *scn = NULL; > + size_t idlist_addr; > + Elf_Data *symbols; > + Elf_Data *idlist; > + size_t strtabidx; > + Elf *elf = NULL; > + GElf_Shdr shdr; > + size_t strndx; > + char *secname; > + int nr_syms; > + int i = 0; Note: when compiled in Release mode (e.g. using buildcmd.sh from the repo) there is a number of false-positive warnings reported by GCC 13.2.1: $ ./buildcmd.sh ... In function ‘is_sym_kfunc_set’, inlined from ‘btf_encoder__tag_kfuncs’ at /home/eddy/work/dwarves-fork/btf_encoder.c:1639:8, inlined from ‘btf_encoder__encode’ at /home/eddy/work/dwarves-fork/btf_encoder.c:1724:29: /home/eddy/work/dwarves-fork/btf_encoder.c:1395:29: warning: ‘idlist_addr’ may be used uninitialized [-Wmaybe-uninitialized] 1395 | off = sym->st_value - idlist_addr; | ~~~~~~~~~~~~~~^~~~~~~~~~~~~ /home/eddy/work/dwarves-fork/btf_encoder.c: In function ‘btf_encoder__encode’: /home/eddy/work/dwarves-fork/btf_encoder.c:1538:16: note: ‘idlist_addr’ was declared here 1538 | size_t idlist_addr; | ^~~~~~~~~~~ In function ‘btf_encoder__tag_kfuncs’, inlined from ‘btf_encoder__encode’ at /home/eddy/work/dwarves-fork/btf_encoder.c:1724:29: Same thing is reported for: - btf_encoder.c:1630:22: warning: ‘symbols’ may be used uninitialized - btf_encoder.c:1385:15: warning: ‘idlist’ may be used uninitialized - btf_encoder.c:1638:24: warning: ‘strtabidx’ may be used uninitialized GCC does not figure out that the variables above are guarded by -1 checks below. [...] > + while ((scn = elf_nextscn(elf, scn)) != NULL) { [...] > + if (shdr.sh_type == SHT_SYMTAB) { > + symbols_shndx = i; > + symscn = scn; > + symbols = data; > + strtabidx = shdr.sh_link; > + } else if (!strcmp(secname, BTF_IDS_SECTION)) { > + idlist_shndx = i; > + idlist_addr = shdr.sh_addr; > + idlist = data; > + } > + } > + > + /* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */ > + if (symbols_shndx == -1 || idlist_shndx == -1) { > + err = 0; > + goto out; > + } [...]
On Sun, 2024-02-04 at 11:40 -0700, Daniel Xu wrote: > This commit teaches pahole to parse symbols in .BTF_ids section in > vmlinux and discover exported kfuncs. Pahole then takes the list of > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > Example of encoding: > > $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l > 121 > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 > [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static > [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1 > > This enables downstream users and tools to dynamically discover which > kfuncs are available on a system by parsing vmlinux or module BTF, both > available in /sys/kernel/btf. > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > --- I've tested this patch-set using kernel built both with clang and gcc, on current bpf-next master (2d9a925d0fbf), both times get 124 kfunc definitions. Tested-by: Eduard Zingerman <eddyz87@gmail.com> Two nitpicks below. [...] > +static char *get_func_name(const char *sym) > +{ > + char *func, *end; > + > + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) > + return NULL; > + > + /* Strip prefix */ > + func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); > + > + /* Strip suffix */ > + end = strrchr(func, '_'); > + if (!end || *(end - 1) != '_') { Nit: this would do out of bounds access on malformed input "__BTF_ID__func___" > + free(func); > + return NULL; > + } > + *(end - 1) = '\0'; > + > + return func; > +} [...] > +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > +{ [...] > + elf = elf_begin(fd, ELF_C_READ, NULL); > + if (elf == NULL) { > + elf_error("Cannot update ELF file"); > + goto out; > + } > + > + /* Location symbol table and .BTF_ids sections */ > + elf_getshdrstrndx(elf, &strndx); Nit: in theory elf_getshdrstrndx() could fail and strndx would remain uninitialized. [...]
On 04/02/2024 18:40, Daniel Xu wrote: > This commit teaches pahole to parse symbols in .BTF_ids section in > vmlinux and discover exported kfuncs. Pahole then takes the list of > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > Example of encoding: > > $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l > 121 > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 > [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static > [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1 > > This enables downstream users and tools to dynamically discover which > kfuncs are available on a system by parsing vmlinux or module BTF, both > available in /sys/kernel/btf. > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> Looks good! A few suggestions below.. > --- > btf_encoder.c | 359 ++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 359 insertions(+) > > diff --git a/btf_encoder.c b/btf_encoder.c > index e325f66..d6a561c 100644 > --- a/btf_encoder.c > +++ b/btf_encoder.c > @@ -34,6 +34,21 @@ > #include <pthread.h> > > #define BTF_ENCODER_MAX_PROTO 512 > +#define BTF_IDS_SECTION ".BTF_ids" > +#define BTF_ID_FUNC_PFX "__BTF_ID__func__" > +#define BTF_ID_SET8_PFX "__BTF_ID__set8__" > +#define BTF_SET8_KFUNCS (1 << 0) > +#define BTF_KFUNC_TYPE_TAG "bpf_kfunc" > + > +/* Adapted from include/linux/btf_ids.h */ > +struct btf_id_set8 { > + uint32_t cnt; > + uint32_t flags; > + struct { > + uint32_t id; > + uint32_t flags; > + } pairs[]; > +}; > > /* state used to do later encoding of saved functions */ > struct btf_encoder_state { > @@ -95,6 +110,17 @@ struct btf_encoder { > } functions; > }; > > +struct btf_func { > + const char *name; > + int type_id; > +}; > + > +/* Half open interval representing range of addresses containing kfuncs */ > +struct btf_kfunc_set_range { > + size_t start; > + size_t end; > +}; > + > static LIST_HEAD(encoders); > static pthread_mutex_t encoders__lock = PTHREAD_MUTEX_INITIALIZER; > > @@ -1353,6 +1379,331 @@ out: > return err; > } > > +/* Returns if `sym` points to a kfunc set */ > +static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, Elf_Data *idlist, size_t idlist_addr) > +{ > + void *ptr = idlist->d_buf; > + struct btf_id_set8 *set; > + bool is_set8; > + int off; > + > + /* kfuncs are only found in BTF_SET8's */ > + is_set8 = !strncmp(name, BTF_ID_SET8_PFX, sizeof(BTF_ID_SET8_PFX) - 1); > + if (!is_set8) > + return false; > + > + off = sym->st_value - idlist_addr; > + if (off >= idlist->d_size) { > + fprintf(stderr, "%s: symbol '%s' out of bounds\n", __func__, name); > + return false; > + } > + > + /* Check the set8 flags to see if it was marked as kfunc */ > + set = ptr + off; > + return set->flags & BTF_SET8_KFUNCS; > +} > + > +/* > + * Parse BTF_ID symbol and return the func name. > + * > + * Returns: > + * Caller-owned string containing func name if successful. > + * NULL if !func or on error. > + */ > +static char *get_func_name(const char *sym) > +{ > + char *func, *end; > + > + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) > + return NULL; > + > + /* Strip prefix */ > + func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); > + > + /* Strip suffix */ > + end = strrchr(func, '_'); > + if (!end || *(end - 1) != '_') { > + free(func); > + return NULL; > + } > + *(end - 1) = '\0'; > + > + return func; > +} > + > +static int btf_func_cmp(const void *_a, const void *_b) > +{ > + const struct btf_func *a = _a; > + const struct btf_func *b = _b; > + > + return strcmp(a->name, b->name); > +} > + > +/* > + * Collects all functions described in BTF. > + * Returns non-zero on error. > + */ > +static int btf_encoder__collect_btf_funcs(struct btf_encoder *encoder, struct gobuffer *funcs) > +{ > + struct btf *btf = encoder->btf; > + int nr_types, type_id; > + int err = -1; > + > + /* First collect all the func entries into an array */ > + nr_types = btf__type_cnt(btf); > + for (type_id = 1; type_id < nr_types; type_id++) { > + const struct btf_type *type; > + struct btf_func func = {}; > + const char *name; > + > + type = btf__type_by_id(btf, type_id); > + if (!type) { > + fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n", > + __func__, type_id); > + err = -EINVAL; > + goto out; > + } > + > + if (!btf_is_func(type)) > + continue; > + > + name = btf__name_by_offset(btf, type->name_off); > + if (!name) { > + fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n", > + __func__, type_id); > + err = -EINVAL; > + goto out; > + } > + > + func.name = name; > + func.type_id = type_id; > + err = gobuffer__add(funcs, &func, sizeof(func)); > + if (err < 0) > + goto out; > + } > + > + /* Now that we've collected funcs, sort them by name */ > + qsort((void *)gobuffer__entries(funcs), gobuffer__nr_entries(funcs), > + sizeof(struct btf_func), btf_func_cmp); > + > + err = 0; > +out: > + return err; > +} > + > +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *funcs, const char *kfunc) > +{ > + struct btf_func key = { .name = kfunc }; > + struct btf *btf = encoder->btf; > + struct btf_func *target; > + const void *base; > + unsigned int cnt; > + int err = -1; > + > + base = gobuffer__entries(funcs); > + cnt = gobuffer__nr_entries(funcs); > + target = bsearch(&key, base, cnt, sizeof(key), btf_func_cmp); > + if (!target) { > + fprintf(stderr, "%s: failed to find kfunc '%s' in BTF\n", __func__, kfunc); > + goto out; > + } > + > + /* Note we are unconditionally adding the btf_decl_tag even > + * though vmlinux may already contain btf_decl_tags for kfuncs. > + * We are ok to do this b/c we will later btf__dedup() to remove > + * any duplicates. > + */ > + err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, target->type_id, -1); > + if (err < 0) { > + fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n", > + __func__, kfunc, err); > + goto out; > + } > + > + err = 0; > +out: > + return err; > +} > + > +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > +{ > + const char *filename = encoder->filename; > + struct gobuffer btf_kfunc_ranges = {}; > + struct gobuffer btf_funcs = {}; > + Elf_Scn *symscn = NULL; > + int symbols_shndx = -1; > + int fd = -1, err = -1; > + int idlist_shndx = -1; > + Elf_Scn *scn = NULL; > + size_t idlist_addr; > + Elf_Data *symbols; > + Elf_Data *idlist; > + size_t strtabidx; > + Elf *elf = NULL; > + GElf_Shdr shdr; > + size_t strndx; > + char *secname; > + int nr_syms; > + int i = 0; > + > + fd = open(filename, O_RDONLY); > + if (fd < 0) { > + fprintf(stderr, "Cannot open %s\n", filename); > + goto out; > + } > + > + if (elf_version(EV_CURRENT) == EV_NONE) { > + elf_error("Cannot set libelf version"); > + goto out; > + } > + > + elf = elf_begin(fd, ELF_C_READ, NULL); > + if (elf == NULL) { > + elf_error("Cannot update ELF file"); > + goto out; > + } > + > + /* Location symbol table and .BTF_ids sections */ > + elf_getshdrstrndx(elf, &strndx); > + while ((scn = elf_nextscn(elf, scn)) != NULL) { > + Elf_Data *data; > + > + i++; > + if (!gelf_getshdr(scn, &shdr)) { > + elf_error("Failed to get ELF section(%d) hdr", i); > + goto out; > + } > + > + secname = elf_strptr(elf, strndx, shdr.sh_name); > + if (!secname) { > + elf_error("Failed to get ELF section(%d) hdr name", i); > + goto out; > + } > + > + data = elf_getdata(scn, 0); > + if (!data) { > + elf_error("Failed to get ELF section(%d) data", i); > + goto out; > + } > + > + if (shdr.sh_type == SHT_SYMTAB) { > + symbols_shndx = i; > + symscn = scn; > + symbols = data; > + strtabidx = shdr.sh_link; > + } else if (!strcmp(secname, BTF_IDS_SECTION)) { > + idlist_shndx = i; > + idlist_addr = shdr.sh_addr; > + idlist = data; > + } > + } > + Can we use the existing list of ELF functions collected via btf_encoder__collect_symbols() for the above? We merge info across CUs about ELF functions. It _seems_ like there might be a way to re-use this info but I might be missing something; more below.. > + /* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */ > + if (symbols_shndx == -1 || idlist_shndx == -1) { > + err = 0; > + goto out; > + } > + > + if (!gelf_getshdr(symscn, &shdr)) { > + elf_error("Failed to get ELF symbol table header"); > + goto out; > + } > + nr_syms = shdr.sh_size / shdr.sh_entsize; > + > + err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs); > + if (err) { > + fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__); > + goto out; > + } > + > + /* First collect all kfunc set ranges. > + * > + * Note we choose not to sort these ranges and accept a linear > + * search when doing lookups. Reasoning is that the number of > + * sets is ~O(100) and not worth the additional code to optimize. > + */ > + for (i = 0; i < nr_syms; i++) { > + struct btf_kfunc_set_range range = {}; > + const char *name; > + GElf_Sym sym; > + > + if (!gelf_getsym(symbols, i, &sym)) { > + elf_error("Failed to get ELF symbol(%d)", i); > + goto out; > + } > + > + if (sym.st_shndx != idlist_shndx) > + continue; > + > + name = elf_strptr(elf, strtabidx, sym.st_name); > + if (!is_sym_kfunc_set(&sym, name, idlist, idlist_addr)) > + continue; > + > + range.start = sym.st_value; > + range.end = sym.st_value + sym.st_size; we could potentially record this info when we collect symbols in btf_encoder__collect_function(). The reason I suggest this is that it is likely that to fully clarify which symbol a name refers to we will end up needing the address. So struct elf_function could record start and size, and that could be used by you later without having to parse ELF for symbols (you'd still need to for the BTF ids section). Then all you'd need to do is iterate over BTF functions, using btf_encoder__find_function() to get a function and associated ELF info by name. > + gobuffer__add(&btf_kfunc_ranges, &range, sizeof(range)); > + } > + > + /* Now inject BTF with kfunc decl tag for detected kfuncs */ > + for (i = 0; i < nr_syms; i++) { > + const struct btf_kfunc_set_range *ranges; > + unsigned int ranges_cnt; > + char *func, *name; > + GElf_Sym sym; > + bool found; > + int err; > + int j; > + > + if (!gelf_getsym(symbols, i, &sym)) { > + elf_error("Failed to get ELF symbol(%d)", i); > + goto out; > + } > + > + if (sym.st_shndx != idlist_shndx) > + continue; > + > + name = elf_strptr(elf, strtabidx, sym.st_name); > + func = get_func_name(name); > + if (!func) > + continue; > + > + /* Check if function belongs to a kfunc set */ > + ranges = gobuffer__entries(&btf_kfunc_ranges); > + ranges_cnt = gobuffer__nr_entries(&btf_kfunc_ranges); > + found = false; > + for (j = 0; j < ranges_cnt; j++) { > + size_t addr = sym.st_value; > + > + if (ranges[j].start <= addr && addr < ranges[j].end) { > + found = true; > + break; > + } > + } > + if (!found) { > + free(func); > + continue; > + } > + > + err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func); > + if (err) { > + fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func); > + free(func); > + goto out; > + } > + free(func); > + } > + > + err = 0; > +out: > + __gobuffer__delete(&btf_funcs); > + __gobuffer__delete(&btf_kfunc_ranges); > + if (elf) > + elf_end(elf); > + if (fd != -1) > + close(fd); > + return err; > +} > + > int btf_encoder__encode(struct btf_encoder *encoder) > { > int err; > @@ -1367,6 +1718,14 @@ int btf_encoder__encode(struct btf_encoder *encoder) > if (btf__type_cnt(encoder->btf) == 1) > return 0; > > + /* Note vmlinux may already contain btf_decl_tag's for kfuncs. So > + * take care to call this before btf_dedup(). > + */ sorry another thing I should have thought of here; if the user has asked to skip encoding declaration tags, we should respect that for this case also. So you'll probably need to set encoder->skip_encoding_btf_decl_tag = conf_load->skip_encoding_btf_decl_tag; ..earlier on, and bail here if encoder->skip_encoding_btf_decl_tag is true. We'd need to be consistent in that if the user asks not to encode declaration tags we don't do it for this case either. > + if (encoder->tag_kfuncs && btf_encoder__tag_kfuncs(encoder)) { > + fprintf(stderr, "%s: failed to tag kfuncs!\n", __func__); > + return -1; > + } > + > if (btf__dedup(encoder->btf, NULL)) { > fprintf(stderr, "%s: btf__dedup failed!\n", __func__); > return -1;
Hi Eduard, Apologies for long delay - life has been busy. On Tue, Feb 06, 2024 at 01:31:20AM +0200, Eduard Zingerman wrote: > On Sun, 2024-02-04 at 11:40 -0700, Daniel Xu wrote: > > This commit teaches pahole to parse symbols in .BTF_ids section in > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > Example of encoding: > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l > > 121 > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 > > [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static > > [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1 > > > > This enables downstream users and tools to dynamically discover which > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > available in /sys/kernel/btf. > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > --- > > I've tested this patch-set using kernel built both with clang and gcc, > on current bpf-next master (2d9a925d0fbf), both times get 124 kfunc definitions. > > Tested-by: Eduard Zingerman <eddyz87@gmail.com> > > Two nitpicks below. > > [...] > > > +static char *get_func_name(const char *sym) > > +{ > > + char *func, *end; > > + > > + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) > > + return NULL; > > + > > + /* Strip prefix */ > > + func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); > > + > > + /* Strip suffix */ > > + end = strrchr(func, '_'); > > + if (!end || *(end - 1) != '_') { > > Nit: this would do out of bounds access on malformed input > "__BTF_ID__func___" I think this is actually ok. Reason is we have the strncmp() above so we know the prefix is there. Then the strdup() in the malformed cased returns empty string. And strrchr() will then return NULL, so we don't enter the branch. I tested it with: https://pastes.dxuuu.xyz/c3j4kk $ gcc test.c dxu@kashmir~/scratch $ ./a.out name=(null) > > > + free(func); > > + return NULL; > > + } > > + *(end - 1) = '\0'; > > + > > + return func; > > +} > > [...] > > > +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > > +{ > > [...] > > > + elf = elf_begin(fd, ELF_C_READ, NULL); > > + if (elf == NULL) { > > + elf_error("Cannot update ELF file"); > > + goto out; > > + } > > + > > + /* Location symbol table and .BTF_ids sections */ > > + elf_getshdrstrndx(elf, &strndx); > > Nit: in theory elf_getshdrstrndx() could fail and strndx would remain > uninitialized. Sure, will fix. Also fixing typo (Location -> Locate). Thanks, Daniel
On Wed, 2024-02-28 at 09:07 -0700, Daniel Xu wrote: > Hi Eduard, > > Apologies for long delay - life has been busy. Hi Daniel, No problem, thank you for reaching back. [...] > > > +static char *get_func_name(const char *sym) > > > +{ > > > + char *func, *end; > > > + > > > + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) > > > + return NULL; > > > + > > > + /* Strip prefix */ > > > + func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); > > > + > > > + /* Strip suffix */ > > > + end = strrchr(func, '_'); > > > + if (!end || *(end - 1) != '_') { > > > > Nit: this would do out of bounds access on malformed input > > "__BTF_ID__func___" > > I think this is actually ok. Reason is we have the strncmp() above > so we know the prefix is there. Then the strdup() in the malformed cased > returns empty string. And strrchr() will then return NULL, so we don't > enter the branch. > > I tested it with: https://pastes.dxuuu.xyz/c3j4kk > > $ gcc test.c > dxu@kashmir~/scratch $ ./a.out > name=(null) > The test is for "__BTF_ID__func__", but nitpick is for "__BTF_ID__func___" (three underscores in the end). E.g. here is a repro: --- 8< --------------------------------------------------------------- $ cat test.c #include <stdlib.h> #include <string.h> #include <stdio.h> #define BTF_ID_FUNC_PFX "__BTF_ID__func__" static char *get_func_name(const char *sym) { char *func, *end; if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) return NULL; /* Strip prefix */ func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); /* Strip suffix */ end = strrchr(func, '_'); if (!end || *(end - 1) != '_') { free(func); return NULL; } *(end - 1) = '\0'; return func; } int main(int argc, char *argv[]) { if (argc < 2) return -1; printf("name='%s;\n", get_func_name(argv[1])); return 0; } $ gcc -g test.c $ valgrind ./a.out __BTF_ID__func___ ==16856== Memcheck, a memory error detector ==16856== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al. ==16856== Using Valgrind-3.22.0 and LibVEX; rerun with -h for copyright info ==16856== Command: ./a.out __BTF_ID__func___ ==16856== ==16856== Invalid read of size 1 ==16856== at 0x4011EB: get_func_name (test.c:19) ==16856== by 0x401244: main (test.c:32) ==16856== Address 0x4a7e03f is 1 bytes before a block of size 2 alloc'd ==16856== at 0x4845784: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so) ==16856== by 0x492176D: strdup (in /usr/lib64/libc.so.6) ==16856== by 0x4011C2: get_func_name (test.c:15) ==16856== by 0x401244: main (test.c:32) ==16856== name='(null); ==16856== ==16856== HEAP SUMMARY: ==16856== in use at exit: 0 bytes in 0 blocks ==16856== total heap usage: 2 allocs, 2 frees, 1,026 bytes allocated ==16856== ==16856== All heap blocks were freed -- no leaks are possible ==16856== ==16856== For lists of detected and suppressed errors, rerun with: -s ==16856== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0) --------------------------------------------------------------- >8 --- Thanks, Eduard
Hi Alan, On Thu, Feb 08, 2024 at 10:00:15AM +0000, Alan Maguire wrote: > On 04/02/2024 18:40, Daniel Xu wrote: [...] > > + > > + if (shdr.sh_type == SHT_SYMTAB) { > > + symbols_shndx = i; > > + symscn = scn; > > + symbols = data; > > + strtabidx = shdr.sh_link; > > + } else if (!strcmp(secname, BTF_IDS_SECTION)) { > > + idlist_shndx = i; > > + idlist_addr = shdr.sh_addr; > > + idlist = data; > > + } > > + } > > + > > Can we use the existing list of ELF functions collected via > btf_encoder__collect_symbols() for the above? We merge info across CUs > about ELF functions. It _seems_ like there might be a way to re-use this > info but I might be missing something; more below.. So the above two sections are only used to acquire information on the set8's. For collecting functions, this patch uses BTF (see btf_encoder__collect_btf_funcs()). > > > > + /* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */ > > + if (symbols_shndx == -1 || idlist_shndx == -1) { > > + err = 0; > > + goto out; > > + } > > + > > + if (!gelf_getshdr(symscn, &shdr)) { > > + elf_error("Failed to get ELF symbol table header"); > > + goto out; > > + } > > + nr_syms = shdr.sh_size / shdr.sh_entsize; > > + > > + err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs); > > + if (err) { > > + fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__); > > + goto out; > > + } > > + > > + /* First collect all kfunc set ranges. > > + * > > + * Note we choose not to sort these ranges and accept a linear > > + * search when doing lookups. Reasoning is that the number of > > + * sets is ~O(100) and not worth the additional code to optimize. > > + */ > > + for (i = 0; i < nr_syms; i++) { > > + struct btf_kfunc_set_range range = {}; > > + const char *name; > > + GElf_Sym sym; > > + > > + if (!gelf_getsym(symbols, i, &sym)) { > > + elf_error("Failed to get ELF symbol(%d)", i); > > + goto out; > > + } > > + > > + if (sym.st_shndx != idlist_shndx) > > + continue; > > + > > + name = elf_strptr(elf, strtabidx, sym.st_name); > > + if (!is_sym_kfunc_set(&sym, name, idlist, idlist_addr)) > > + continue; > > + > > + range.start = sym.st_value; > > + range.end = sym.st_value + sym.st_size; > > we could potentially record this info when we collect symbols in > btf_encoder__collect_function(). The reason I suggest this is that it is > likely that to fully clarify which symbol a name refers to we will end > up needing the address. So struct elf_function could record start and > size, and that could be used by you later without having to parse ELF > for symbols (you'd still need to for the BTF ids section). This start/end pair is just for the set8's in .BTF_ids section. Which btf_encoder__collect_function() rightfully does not look at. So I think new code needs to be added for set8 collection anyways. I don't have any strong feelings about whether we should hook into btf_encoder__collect_symbols() or not -- IMHO it's a bit cleaner to have all the set8 stuff on its own codepath. What cases do you see needing the location + size for? Since kfuncs are exported, I would think it's not possible for a single object file to have multiple copies of the same kfunc. Nor that the compiler inline the kfunc. > > Then all you'd need to do is iterate over BTF functions, using > btf_encoder__find_function() to get a function and associated ELF info > by name. Didn't know about this, thanks. I'll take a look at if the patch can use the existing function metadata. That should get rid of btf_encoder__collect_btf_funcs() if it works. > > > > + gobuffer__add(&btf_kfunc_ranges, &range, sizeof(range)); > > + } > > + > > + /* Now inject BTF with kfunc decl tag for detected kfuncs */ > > + for (i = 0; i < nr_syms; i++) { > > + const struct btf_kfunc_set_range *ranges; > > + unsigned int ranges_cnt; > > + char *func, *name; > > + GElf_Sym sym; > > + bool found; > > + int err; > > + int j; > > + > > + if (!gelf_getsym(symbols, i, &sym)) { > > + elf_error("Failed to get ELF symbol(%d)", i); > > + goto out; > > + } > > + > > + if (sym.st_shndx != idlist_shndx) > > + continue; > > + > > + name = elf_strptr(elf, strtabidx, sym.st_name); > > + func = get_func_name(name); > > + if (!func) > > + continue; > > + > > + /* Check if function belongs to a kfunc set */ > > + ranges = gobuffer__entries(&btf_kfunc_ranges); > > + ranges_cnt = gobuffer__nr_entries(&btf_kfunc_ranges); > > + found = false; > > + for (j = 0; j < ranges_cnt; j++) { > > + size_t addr = sym.st_value; > > + > > + if (ranges[j].start <= addr && addr < ranges[j].end) { > > + found = true; > > + break; > > + } > > + } > > + if (!found) { > > + free(func); > > + continue; > > + } > > + > > + err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func); > > + if (err) { > > + fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func); > > + free(func); > > + goto out; > > + } > > + free(func); > > + } > > + > > + err = 0; > > +out: > > + __gobuffer__delete(&btf_funcs); > > + __gobuffer__delete(&btf_kfunc_ranges); > > + if (elf) > > + elf_end(elf); > > + if (fd != -1) > > + close(fd); > > + return err; > > +} > > + > > int btf_encoder__encode(struct btf_encoder *encoder) > > { > > int err; > > @@ -1367,6 +1718,14 @@ int btf_encoder__encode(struct btf_encoder *encoder) > > if (btf__type_cnt(encoder->btf) == 1) > > return 0; > > > > + /* Note vmlinux may already contain btf_decl_tag's for kfuncs. So > > + * take care to call this before btf_dedup(). > > + */ > > sorry another thing I should have thought of here; if the user has asked > to skip encoding declaration tags, we should respect that for this case > also. So you'll probably need to set > > encoder->skip_encoding_btf_decl_tag = > conf_load->skip_encoding_btf_decl_tag; > > ..earlier on, and bail here if encoder->skip_encoding_btf_decl_tag is > true. We'd need to be consistent in that if the user asks not to encode > declaration tags we don't do it for this case either. Yeah, good catch. Will do. [...] Thanks, Daniel
Hi Alan, On Wed, Feb 28, 2024 at 05:57:01PM -0700, Daniel Xu wrote: > Hi Alan, > > On Thu, Feb 08, 2024 at 10:00:15AM +0000, Alan Maguire wrote: > > On 04/02/2024 18:40, Daniel Xu wrote: [...] > > > > Then all you'd need to do is iterate over BTF functions, using > > btf_encoder__find_function() to get a function and associated ELF info > > by name. > > Didn't know about this, thanks. I'll take a look at if the patch can use > the existing function metadata. That should get rid of > btf_encoder__collect_btf_funcs() if it works. I experimented with this a bit and I'm not sure if it's a good approach. Here are the two commits: https://pastes.dxuuu.xyz/xo9jwk https://pastes.dxuuu.xyz/xmzew5 Basically my approach was to fan-in all the function info collected by the different threads. I don't think it'll work cuz some of the data (the name in particular) in struct elf_function belongs to the thread's struct btf_encoder instance. Which is all freed by btf_encoder__delete() before btf_encoder__encode(). It could probably be fixed, but doesn't seem very clean to me. So I think it'll be better to keep the code as-is. Unless you were thinking something different. Thanks, Daniel
On 13/03/2024 03:17, Daniel Xu wrote: > Hi Alan, > > On Wed, Feb 28, 2024 at 05:57:01PM -0700, Daniel Xu wrote: >> Hi Alan, >> >> On Thu, Feb 08, 2024 at 10:00:15AM +0000, Alan Maguire wrote: >>> On 04/02/2024 18:40, Daniel Xu wrote: > > [...] > >>> >>> Then all you'd need to do is iterate over BTF functions, using >>> btf_encoder__find_function() to get a function and associated ELF info >>> by name. >> >> Didn't know about this, thanks. I'll take a look at if the patch can use >> the existing function metadata. That should get rid of >> btf_encoder__collect_btf_funcs() if it works. > > I experimented with this a bit and I'm not sure if it's a good approach. > > Here are the two commits: > > https://pastes.dxuuu.xyz/xo9jwk > https://pastes.dxuuu.xyz/xmzew5 > > Basically my approach was to fan-in all the function info collected by > the different threads. I don't think it'll work cuz some of the data > (the name in particular) in struct elf_function belongs to the thread's > struct btf_encoder instance. Which is all freed by btf_encoder__delete() > before btf_encoder__encode(). > > It could probably be fixed, but doesn't seem very clean to me. So I > think it'll be better to keep the code as-is. Unless you were thinking > something different. > Thanks for trying! We may end up revisiting the freeing of ELF function/variable info in the future if we add address information from symbols into BTF, but since that's not there yet, it makes sense to do your collection separately. Alan
Hi Eduard, On Wed, Feb 28, 2024 at 11:33:28PM +0200, Eduard Zingerman wrote: > On Wed, 2024-02-28 at 09:07 -0700, Daniel Xu wrote: > > Hi Eduard, > > > > Apologies for long delay - life has been busy. > > Hi Daniel, > > No problem, thank you for reaching back. > > [...] > > > > > +static char *get_func_name(const char *sym) > > > > +{ > > > > + char *func, *end; > > > > + > > > > + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) > > > > + return NULL; > > > > + > > > > + /* Strip prefix */ > > > > + func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); > > > > + > > > > + /* Strip suffix */ > > > > + end = strrchr(func, '_'); > > > > + if (!end || *(end - 1) != '_') { > > > > > > Nit: this would do out of bounds access on malformed input > > > "__BTF_ID__func___" > > > > I think this is actually ok. Reason is we have the strncmp() above > > so we know the prefix is there. Then the strdup() in the malformed cased > > returns empty string. And strrchr() will then return NULL, so we don't > > enter the branch. > > > > I tested it with: https://pastes.dxuuu.xyz/c3j4kk > > > > $ gcc test.c > > dxu@kashmir~/scratch $ ./a.out > > name=(null) > > > > The test is for "__BTF_ID__func__", but nitpick is for "__BTF_ID__func___" > (three underscores in the end). > Ha, got it. Didn't see the 3rd one. Fixed in v5. [...] Thanks, Daniel
diff --git a/btf_encoder.c b/btf_encoder.c index e325f66..d6a561c 100644 --- a/btf_encoder.c +++ b/btf_encoder.c @@ -34,6 +34,21 @@ #include <pthread.h> #define BTF_ENCODER_MAX_PROTO 512 +#define BTF_IDS_SECTION ".BTF_ids" +#define BTF_ID_FUNC_PFX "__BTF_ID__func__" +#define BTF_ID_SET8_PFX "__BTF_ID__set8__" +#define BTF_SET8_KFUNCS (1 << 0) +#define BTF_KFUNC_TYPE_TAG "bpf_kfunc" + +/* Adapted from include/linux/btf_ids.h */ +struct btf_id_set8 { + uint32_t cnt; + uint32_t flags; + struct { + uint32_t id; + uint32_t flags; + } pairs[]; +}; /* state used to do later encoding of saved functions */ struct btf_encoder_state { @@ -95,6 +110,17 @@ struct btf_encoder { } functions; }; +struct btf_func { + const char *name; + int type_id; +}; + +/* Half open interval representing range of addresses containing kfuncs */ +struct btf_kfunc_set_range { + size_t start; + size_t end; +}; + static LIST_HEAD(encoders); static pthread_mutex_t encoders__lock = PTHREAD_MUTEX_INITIALIZER; @@ -1353,6 +1379,331 @@ out: return err; } +/* Returns if `sym` points to a kfunc set */ +static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, Elf_Data *idlist, size_t idlist_addr) +{ + void *ptr = idlist->d_buf; + struct btf_id_set8 *set; + bool is_set8; + int off; + + /* kfuncs are only found in BTF_SET8's */ + is_set8 = !strncmp(name, BTF_ID_SET8_PFX, sizeof(BTF_ID_SET8_PFX) - 1); + if (!is_set8) + return false; + + off = sym->st_value - idlist_addr; + if (off >= idlist->d_size) { + fprintf(stderr, "%s: symbol '%s' out of bounds\n", __func__, name); + return false; + } + + /* Check the set8 flags to see if it was marked as kfunc */ + set = ptr + off; + return set->flags & BTF_SET8_KFUNCS; +} + +/* + * Parse BTF_ID symbol and return the func name. + * + * Returns: + * Caller-owned string containing func name if successful. + * NULL if !func or on error. + */ +static char *get_func_name(const char *sym) +{ + char *func, *end; + + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) + return NULL; + + /* Strip prefix */ + func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); + + /* Strip suffix */ + end = strrchr(func, '_'); + if (!end || *(end - 1) != '_') { + free(func); + return NULL; + } + *(end - 1) = '\0'; + + return func; +} + +static int btf_func_cmp(const void *_a, const void *_b) +{ + const struct btf_func *a = _a; + const struct btf_func *b = _b; + + return strcmp(a->name, b->name); +} + +/* + * Collects all functions described in BTF. + * Returns non-zero on error. + */ +static int btf_encoder__collect_btf_funcs(struct btf_encoder *encoder, struct gobuffer *funcs) +{ + struct btf *btf = encoder->btf; + int nr_types, type_id; + int err = -1; + + /* First collect all the func entries into an array */ + nr_types = btf__type_cnt(btf); + for (type_id = 1; type_id < nr_types; type_id++) { + const struct btf_type *type; + struct btf_func func = {}; + const char *name; + + type = btf__type_by_id(btf, type_id); + if (!type) { + fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n", + __func__, type_id); + err = -EINVAL; + goto out; + } + + if (!btf_is_func(type)) + continue; + + name = btf__name_by_offset(btf, type->name_off); + if (!name) { + fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n", + __func__, type_id); + err = -EINVAL; + goto out; + } + + func.name = name; + func.type_id = type_id; + err = gobuffer__add(funcs, &func, sizeof(func)); + if (err < 0) + goto out; + } + + /* Now that we've collected funcs, sort them by name */ + qsort((void *)gobuffer__entries(funcs), gobuffer__nr_entries(funcs), + sizeof(struct btf_func), btf_func_cmp); + + err = 0; +out: + return err; +} + +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *funcs, const char *kfunc) +{ + struct btf_func key = { .name = kfunc }; + struct btf *btf = encoder->btf; + struct btf_func *target; + const void *base; + unsigned int cnt; + int err = -1; + + base = gobuffer__entries(funcs); + cnt = gobuffer__nr_entries(funcs); + target = bsearch(&key, base, cnt, sizeof(key), btf_func_cmp); + if (!target) { + fprintf(stderr, "%s: failed to find kfunc '%s' in BTF\n", __func__, kfunc); + goto out; + } + + /* Note we are unconditionally adding the btf_decl_tag even + * though vmlinux may already contain btf_decl_tags for kfuncs. + * We are ok to do this b/c we will later btf__dedup() to remove + * any duplicates. + */ + err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, target->type_id, -1); + if (err < 0) { + fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n", + __func__, kfunc, err); + goto out; + } + + err = 0; +out: + return err; +} + +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) +{ + const char *filename = encoder->filename; + struct gobuffer btf_kfunc_ranges = {}; + struct gobuffer btf_funcs = {}; + Elf_Scn *symscn = NULL; + int symbols_shndx = -1; + int fd = -1, err = -1; + int idlist_shndx = -1; + Elf_Scn *scn = NULL; + size_t idlist_addr; + Elf_Data *symbols; + Elf_Data *idlist; + size_t strtabidx; + Elf *elf = NULL; + GElf_Shdr shdr; + size_t strndx; + char *secname; + int nr_syms; + int i = 0; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Cannot open %s\n", filename); + goto out; + } + + if (elf_version(EV_CURRENT) == EV_NONE) { + elf_error("Cannot set libelf version"); + goto out; + } + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (elf == NULL) { + elf_error("Cannot update ELF file"); + goto out; + } + + /* Location symbol table and .BTF_ids sections */ + elf_getshdrstrndx(elf, &strndx); + while ((scn = elf_nextscn(elf, scn)) != NULL) { + Elf_Data *data; + + i++; + if (!gelf_getshdr(scn, &shdr)) { + elf_error("Failed to get ELF section(%d) hdr", i); + goto out; + } + + secname = elf_strptr(elf, strndx, shdr.sh_name); + if (!secname) { + elf_error("Failed to get ELF section(%d) hdr name", i); + goto out; + } + + data = elf_getdata(scn, 0); + if (!data) { + elf_error("Failed to get ELF section(%d) data", i); + goto out; + } + + if (shdr.sh_type == SHT_SYMTAB) { + symbols_shndx = i; + symscn = scn; + symbols = data; + strtabidx = shdr.sh_link; + } else if (!strcmp(secname, BTF_IDS_SECTION)) { + idlist_shndx = i; + idlist_addr = shdr.sh_addr; + idlist = data; + } + } + + /* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */ + if (symbols_shndx == -1 || idlist_shndx == -1) { + err = 0; + goto out; + } + + if (!gelf_getshdr(symscn, &shdr)) { + elf_error("Failed to get ELF symbol table header"); + goto out; + } + nr_syms = shdr.sh_size / shdr.sh_entsize; + + err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs); + if (err) { + fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__); + goto out; + } + + /* First collect all kfunc set ranges. + * + * Note we choose not to sort these ranges and accept a linear + * search when doing lookups. Reasoning is that the number of + * sets is ~O(100) and not worth the additional code to optimize. + */ + for (i = 0; i < nr_syms; i++) { + struct btf_kfunc_set_range range = {}; + const char *name; + GElf_Sym sym; + + if (!gelf_getsym(symbols, i, &sym)) { + elf_error("Failed to get ELF symbol(%d)", i); + goto out; + } + + if (sym.st_shndx != idlist_shndx) + continue; + + name = elf_strptr(elf, strtabidx, sym.st_name); + if (!is_sym_kfunc_set(&sym, name, idlist, idlist_addr)) + continue; + + range.start = sym.st_value; + range.end = sym.st_value + sym.st_size; + gobuffer__add(&btf_kfunc_ranges, &range, sizeof(range)); + } + + /* Now inject BTF with kfunc decl tag for detected kfuncs */ + for (i = 0; i < nr_syms; i++) { + const struct btf_kfunc_set_range *ranges; + unsigned int ranges_cnt; + char *func, *name; + GElf_Sym sym; + bool found; + int err; + int j; + + if (!gelf_getsym(symbols, i, &sym)) { + elf_error("Failed to get ELF symbol(%d)", i); + goto out; + } + + if (sym.st_shndx != idlist_shndx) + continue; + + name = elf_strptr(elf, strtabidx, sym.st_name); + func = get_func_name(name); + if (!func) + continue; + + /* Check if function belongs to a kfunc set */ + ranges = gobuffer__entries(&btf_kfunc_ranges); + ranges_cnt = gobuffer__nr_entries(&btf_kfunc_ranges); + found = false; + for (j = 0; j < ranges_cnt; j++) { + size_t addr = sym.st_value; + + if (ranges[j].start <= addr && addr < ranges[j].end) { + found = true; + break; + } + } + if (!found) { + free(func); + continue; + } + + err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func); + if (err) { + fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func); + free(func); + goto out; + } + free(func); + } + + err = 0; +out: + __gobuffer__delete(&btf_funcs); + __gobuffer__delete(&btf_kfunc_ranges); + if (elf) + elf_end(elf); + if (fd != -1) + close(fd); + return err; +} + int btf_encoder__encode(struct btf_encoder *encoder) { int err; @@ -1367,6 +1718,14 @@ int btf_encoder__encode(struct btf_encoder *encoder) if (btf__type_cnt(encoder->btf) == 1) return 0; + /* Note vmlinux may already contain btf_decl_tag's for kfuncs. So + * take care to call this before btf_dedup(). + */ + if (encoder->tag_kfuncs && btf_encoder__tag_kfuncs(encoder)) { + fprintf(stderr, "%s: failed to tag kfuncs!\n", __func__); + return -1; + } + if (btf__dedup(encoder->btf, NULL)) { fprintf(stderr, "%s: btf__dedup failed!\n", __func__); return -1;
This commit teaches pahole to parse symbols in .BTF_ids section in vmlinux and discover exported kfuncs. Pahole then takes the list of kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. Example of encoding: $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l 121 $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1 This enables downstream users and tools to dynamically discover which kfuncs are available on a system by parsing vmlinux or module BTF, both available in /sys/kernel/btf. Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> --- btf_encoder.c | 359 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 359 insertions(+)